From ace9429bb58fd418f0c81d4c2835699bddf6bde6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 11 Apr 2024 10:27:49 +0200 Subject: Adding upstream version 6.6.15. Signed-off-by: Daniel Baumann --- drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 191 + drivers/net/ethernet/mellanox/mlx5/core/Makefile | 130 + drivers/net/ethernet/mellanox/mlx5/core/alloc.c | 250 + drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 2312 ++++++++ drivers/net/ethernet/mellanox/mlx5/core/cq.c | 228 + drivers/net/ethernet/mellanox/mlx5/core/debugfs.c | 603 ++ drivers/net/ethernet/mellanox/mlx5/core/dev.c | 632 ++ drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 860 +++ drivers/net/ethernet/mellanox/mlx5/core/devlink.h | 68 + .../mellanox/mlx5/core/diag/cmd_tracepoint.h | 45 + .../net/ethernet/mellanox/mlx5/core/diag/crdump.c | 115 + .../mellanox/mlx5/core/diag/en_rep_tracepoint.h | 54 + .../mellanox/mlx5/core/diag/en_tc_tracepoint.c | 58 + .../mellanox/mlx5/core/diag/en_tc_tracepoint.h | 114 + .../mellanox/mlx5/core/diag/fs_tracepoint.c | 299 + .../mellanox/mlx5/core/diag/fs_tracepoint.h | 323 + .../ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 1215 ++++ .../ethernet/mellanox/mlx5/core/diag/fw_tracer.h | 205 + .../mellanox/mlx5/core/diag/fw_tracer_tracepoint.h | 79 + .../mellanox/mlx5/core/diag/reporter_vnic.c | 153 + .../mellanox/mlx5/core/diag/reporter_vnic.h | 16 + .../ethernet/mellanox/mlx5/core/diag/rsc_dump.c | 311 + .../ethernet/mellanox/mlx5/core/diag/rsc_dump.h | 27 + drivers/net/ethernet/mellanox/mlx5/core/ecpf.c | 97 + drivers/net/ethernet/mellanox/mlx5/core/ecpf.h | 32 + drivers/net/ethernet/mellanox/mlx5/core/en.h | 1221 ++++ .../net/ethernet/mellanox/mlx5/core/en/channels.c | 51 + .../net/ethernet/mellanox/mlx5/core/en/channels.h | 17 + drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h | 53 + .../net/ethernet/mellanox/mlx5/core/en/devlink.c | 74 + .../net/ethernet/mellanox/mlx5/core/en/devlink.h | 17 + drivers/net/ethernet/mellanox/mlx5/core/en/fs.h | 209 + .../ethernet/mellanox/mlx5/core/en/fs_ethtool.h | 29 + .../mellanox/mlx5/core/en/fs_tt_redirect.c | 615 ++ .../mellanox/mlx5/core/en/fs_tt_redirect.h | 25 + .../net/ethernet/mellanox/mlx5/core/en/health.c | 339 ++ .../net/ethernet/mellanox/mlx5/core/en/health.h | 59 + drivers/net/ethernet/mellanox/mlx5/core/en/htb.c | 722 +++ drivers/net/ethernet/mellanox/mlx5/core/en/htb.h | 46 + .../ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c | 159 + .../ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h | 18 + .../net/ethernet/mellanox/mlx5/core/en/mapping.c | 263 + .../net/ethernet/mellanox/mlx5/core/en/mapping.h | 32 + .../net/ethernet/mellanox/mlx5/core/en/mod_hdr.c | 216 + .../net/ethernet/mellanox/mlx5/core/en/mod_hdr.h | 57 + .../ethernet/mellanox/mlx5/core/en/monitor_stats.c | 151 + .../ethernet/mellanox/mlx5/core/en/monitor_stats.h | 12 + .../net/ethernet/mellanox/mlx5/core/en/params.c | 1312 +++++ .../net/ethernet/mellanox/mlx5/core/en/params.h | 175 + drivers/net/ethernet/mellanox/mlx5/core/en/port.c | 515 ++ drivers/net/ethernet/mellanox/mlx5/core/en/port.h | 68 + .../ethernet/mellanox/mlx5/core/en/port_buffer.c | 590 ++ .../ethernet/mellanox/mlx5/core/en/port_buffer.h | 77 + drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 1012 ++++ drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h | 147 + drivers/net/ethernet/mellanox/mlx5/core/en/qos.c | 523 ++ drivers/net/ethernet/mellanox/mlx5/core/en/qos.h | 51 + .../net/ethernet/mellanox/mlx5/core/en/rep/bond.c | 351 ++ .../ethernet/mellanox/mlx5/core/en/rep/bridge.c | 599 ++ .../ethernet/mellanox/mlx5/core/en/rep/bridge.h | 21 + .../net/ethernet/mellanox/mlx5/core/en/rep/neigh.c | 398 ++ .../net/ethernet/mellanox/mlx5/core/en/rep/neigh.h | 35 + .../net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 750 +++ .../net/ethernet/mellanox/mlx5/core/en/rep/tc.h | 73 + .../ethernet/mellanox/mlx5/core/en/reporter_rx.c | 793 +++ .../ethernet/mellanox/mlx5/core/en/reporter_tx.c | 721 +++ drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c | 170 + drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h | 42 + drivers/net/ethernet/mellanox/mlx5/core/en/rss.c | 606 ++ drivers/net/ethernet/mellanox/mlx5/core/en/rss.h | 50 + .../net/ethernet/mellanox/mlx5/core/en/rx_res.c | 655 +++ .../net/ethernet/mellanox/mlx5/core/en/rx_res.h | 70 + drivers/net/ethernet/mellanox/mlx5/core/en/selq.c | 266 + drivers/net/ethernet/mellanox/mlx5/core/en/selq.h | 53 + .../ethernet/mellanox/mlx5/core/en/tc/act/accept.c | 22 + .../ethernet/mellanox/mlx5/core/en/tc/act/act.c | 139 + .../ethernet/mellanox/mlx5/core/en/tc/act/act.h | 124 + .../ethernet/mellanox/mlx5/core/en/tc/act/csum.c | 62 + .../net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c | 64 + .../ethernet/mellanox/mlx5/core/en/tc/act/drop.c | 21 + .../ethernet/mellanox/mlx5/core/en/tc/act/goto.c | 125 + .../ethernet/mellanox/mlx5/core/en/tc/act/mark.c | 36 + .../ethernet/mellanox/mlx5/core/en/tc/act/mirred.c | 337 ++ .../mellanox/mlx5/core/en/tc/act/mirred_nic.c | 52 + .../ethernet/mellanox/mlx5/core/en/tc/act/mpls.c | 99 + .../ethernet/mellanox/mlx5/core/en/tc/act/pedit.c | 111 + .../ethernet/mellanox/mlx5/core/en/tc/act/pedit.h | 30 + .../ethernet/mellanox/mlx5/core/en/tc/act/police.c | 212 + .../ethernet/mellanox/mlx5/core/en/tc/act/ptype.c | 26 + .../mlx5/core/en/tc/act/redirect_ingress.c | 79 + .../ethernet/mellanox/mlx5/core/en/tc/act/sample.c | 51 + .../ethernet/mellanox/mlx5/core/en/tc/act/sample.h | 14 + .../ethernet/mellanox/mlx5/core/en/tc/act/trap.c | 22 + .../ethernet/mellanox/mlx5/core/en/tc/act/tun.c | 53 + .../ethernet/mellanox/mlx5/core/en/tc/act/vlan.c | 202 + .../ethernet/mellanox/mlx5/core/en/tc/act/vlan.h | 29 + .../mellanox/mlx5/core/en/tc/act/vlan_mangle.c | 78 + .../ethernet/mellanox/mlx5/core/en/tc/act_stats.c | 199 + .../ethernet/mellanox/mlx5/core/en/tc/act_stats.h | 27 + .../net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h | 49 + .../ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c | 79 + .../ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c | 385 ++ .../ethernet/mellanox/mlx5/core/en/tc/int_port.c | 457 ++ .../ethernet/mellanox/mlx5/core/en/tc/int_port.h | 65 + .../net/ethernet/mellanox/mlx5/core/en/tc/meter.c | 595 ++ .../net/ethernet/mellanox/mlx5/core/en/tc/meter.h | 75 + .../ethernet/mellanox/mlx5/core/en/tc/post_act.c | 181 + .../ethernet/mellanox/mlx5/core/en/tc/post_act.h | 43 + .../ethernet/mellanox/mlx5/core/en/tc/post_meter.c | 460 ++ .../ethernet/mellanox/mlx5/core/en/tc/post_meter.h | 62 + .../net/ethernet/mellanox/mlx5/core/en/tc/sample.c | 655 +++ .../net/ethernet/mellanox/mlx5/core/en/tc/sample.h | 66 + drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 2327 ++++++++ drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h | 206 + .../net/ethernet/mellanox/mlx5/core/en/tc_priv.h | 212 + .../net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 991 ++++ .../net/ethernet/mellanox/mlx5/core/en/tc_tun.h | 123 + .../ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 1885 ++++++ .../ethernet/mellanox/mlx5/core/en/tc_tun_encap.h | 50 + .../ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c | 353 ++ .../ethernet/mellanox/mlx5/core/en/tc_tun_gre.c | 98 + .../mellanox/mlx5/core/en/tc_tun_mplsoudp.c | 128 + .../ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c | 233 + drivers/net/ethernet/mellanox/mlx5/core/en/tir.c | 203 + drivers/net/ethernet/mellanox/mlx5/core/en/tir.h | 58 + drivers/net/ethernet/mellanox/mlx5/core/en/trap.c | 330 ++ drivers/net/ethernet/mellanox/mlx5/core/en/trap.h | 37 + drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 509 ++ drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 909 +++ drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h | 241 + .../net/ethernet/mellanox/mlx5/core/en/xsk/pool.c | 230 + .../net/ethernet/mellanox/mlx5/core/en/xsk/pool.h | 27 + .../net/ethernet/mellanox/mlx5/core/en/xsk/rx.c | 337 ++ .../net/ethernet/mellanox/mlx5/core/en/xsk/rx.h | 25 + .../net/ethernet/mellanox/mlx5/core/en/xsk/setup.c | 212 + .../net/ethernet/mellanox/mlx5/core/en/xsk/setup.h | 21 + .../net/ethernet/mellanox/mlx5/core/en/xsk/tx.c | 123 + .../net/ethernet/mellanox/mlx5/core/en/xsk/tx.h | 15 + .../mellanox/mlx5/core/en_accel/en_accel.h | 222 + .../ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c | 404 ++ .../ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h | 27 + .../ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 1191 ++++ .../ethernet/mellanox/mlx5/core/en_accel/ipsec.h | 368 ++ .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 2097 +++++++ .../mellanox/mlx5/core/en_accel/ipsec_offload.c | 627 ++ .../mellanox/mlx5/core/en_accel/ipsec_rxtx.c | 381 ++ .../mellanox/mlx5/core/en_accel/ipsec_rxtx.h | 171 + .../mellanox/mlx5/core/en_accel/ipsec_stats.c | 136 + .../ethernet/mellanox/mlx5/core/en_accel/ktls.c | 220 + .../ethernet/mellanox/mlx5/core/en_accel/ktls.h | 156 + .../ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c | 785 +++ .../mellanox/mlx5/core/en_accel/ktls_stats.c | 92 + .../ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 958 +++ .../mellanox/mlx5/core/en_accel/ktls_txrx.c | 138 + .../mellanox/mlx5/core/en_accel/ktls_txrx.h | 93 + .../mellanox/mlx5/core/en_accel/ktls_utils.h | 91 + .../ethernet/mellanox/mlx5/core/en_accel/macsec.c | 1775 ++++++ .../ethernet/mellanox/mlx5/core/en_accel/macsec.h | 53 + .../mellanox/mlx5/core/en_accel/macsec_stats.c | 74 + drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c | 776 +++ .../net/ethernet/mellanox/mlx5/core/en_common.c | 181 + drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 1257 ++++ drivers/net/ethernet/mellanox/mlx5/core/en_dim.c | 62 + .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 2451 ++++++++ drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 1600 +++++ .../ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 1024 ++++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6151 ++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 1728 ++++++ drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 286 + drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 2774 +++++++++ .../net/ethernet/mellanox/mlx5/core/en_selftest.c | 371 ++ drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 2513 ++++++++ drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 523 ++ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5760 ++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/en_tc.h | 409 ++ drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 1051 ++++ drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 274 + drivers/net/ethernet/mellanox/mlx5/core/eq.c | 1261 ++++ .../net/ethernet/mellanox/mlx5/core/esw/Makefile | 2 + .../mellanox/mlx5/core/esw/acl/egress_lgcy.c | 178 + .../mellanox/mlx5/core/esw/acl/egress_ofld.c | 274 + .../ethernet/mellanox/mlx5/core/esw/acl/helper.c | 164 + .../ethernet/mellanox/mlx5/core/esw/acl/helper.h | 26 + .../mellanox/mlx5/core/esw/acl/ingress_lgcy.c | 305 + .../mellanox/mlx5/core/esw/acl/ingress_ofld.c | 408 ++ .../net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h | 17 + .../net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h | 45 + .../net/ethernet/mellanox/mlx5/core/esw/bridge.c | 1952 +++++++ .../net/ethernet/mellanox/mlx5/core/esw/bridge.h | 97 + .../mellanox/mlx5/core/esw/bridge_debugfs.c | 89 + .../ethernet/mellanox/mlx5/core/esw/bridge_mcast.c | 1134 ++++ .../ethernet/mellanox/mlx5/core/esw/bridge_priv.h | 251 + .../ethernet/mellanox/mlx5/core/esw/devlink_port.c | 206 + .../mlx5/core/esw/diag/bridge_tracepoint.h | 155 + .../mellanox/mlx5/core/esw/diag/qos_tracepoint.h | 123 + .../ethernet/mellanox/mlx5/core/esw/indir_table.c | 382 ++ .../ethernet/mellanox/mlx5/core/esw/indir_table.h | 72 + .../net/ethernet/mellanox/mlx5/core/esw/ipsec.c | 369 ++ .../net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c | 173 + .../net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h | 52 + .../net/ethernet/mellanox/mlx5/core/esw/legacy.c | 527 ++ .../net/ethernet/mellanox/mlx5/core/esw/legacy.h | 22 + drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 943 +++ drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h | 31 + .../net/ethernet/mellanox/mlx5/core/esw/vporttbl.c | 150 + drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2398 ++++++++ drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 959 +++ .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 4577 +++++++++++++++ .../mellanox/mlx5/core/eswitch_offloads_termtbl.c | 335 ++ drivers/net/ethernet/mellanox/mlx5/core/events.c | 448 ++ drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c | 235 + drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h | 91 + .../net/ethernet/mellanox/mlx5/core/fpga/conn.c | 1001 ++++ .../net/ethernet/mellanox/mlx5/core/fpga/conn.h | 96 + .../net/ethernet/mellanox/mlx5/core/fpga/core.c | 375 ++ .../net/ethernet/mellanox/mlx5/core/fpga/core.h | 111 + drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c | 170 + drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h | 214 + drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 1148 ++++ drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h | 125 + drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 3780 ++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 374 ++ .../net/ethernet/mellanox/mlx5/core/fs_counters.c | 772 +++ .../net/ethernet/mellanox/mlx5/core/fs_ft_pool.c | 85 + .../net/ethernet/mellanox/mlx5/core/fs_ft_pool.h | 21 + drivers/net/ethernet/mellanox/mlx5/core/fw.c | 845 +++ drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 755 +++ drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h | 23 + drivers/net/ethernet/mellanox/mlx5/core/health.c | 928 +++ drivers/net/ethernet/mellanox/mlx5/core/hwmon.c | 418 ++ drivers/net/ethernet/mellanox/mlx5/core/hwmon.h | 24 + .../ethernet/mellanox/mlx5/core/ipoib/ethtool.c | 296 + .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 848 +++ .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h | 127 + .../ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c | 369 ++ .../net/ethernet/mellanox/mlx5/core/irq_affinity.c | 170 + .../net/ethernet/mellanox/mlx5/core/lag/debugfs.c | 182 + drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 1620 ++++++ drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h | 126 + drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c | 385 ++ drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h | 42 + .../net/ethernet/mellanox/mlx5/core/lag/mpesw.c | 214 + .../net/ethernet/mellanox/mlx5/core/lag/mpesw.h | 35 + .../net/ethernet/mellanox/mlx5/core/lag/port_sel.c | 637 ++ .../net/ethernet/mellanox/mlx5/core/lag/port_sel.h | 53 + drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c | 432 ++ drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h | 92 + .../net/ethernet/mellanox/mlx5/core/lib/clock.c | 1088 ++++ .../net/ethernet/mellanox/mlx5/core/lib/clock.h | 121 + .../net/ethernet/mellanox/mlx5/core/lib/crypto.c | 774 +++ .../net/ethernet/mellanox/mlx5/core/lib/crypto.h | 34 + .../net/ethernet/mellanox/mlx5/core/lib/devcom.c | 385 ++ .../net/ethernet/mellanox/mlx5/core/lib/devcom.h | 54 + drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c | 267 + drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h | 109 + .../net/ethernet/mellanox/mlx5/core/lib/events.h | 40 + .../ethernet/mellanox/mlx5/core/lib/fs_chains.c | 807 +++ .../ethernet/mellanox/mlx5/core/lib/fs_chains.h | 101 + .../net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c | 608 ++ .../net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h | 72 + .../net/ethernet/mellanox/mlx5/core/lib/geneve.c | 158 + .../net/ethernet/mellanox/mlx5/core/lib/geneve.h | 33 + drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c | 152 + drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c | 64 + drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h | 22 + .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c | 371 ++ .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h | 97 + .../mellanox/mlx5/core/lib/ipsec_fs_roce.c | 371 ++ .../mellanox/mlx5/core/lib/ipsec_fs_roce.h | 25 + .../ethernet/mellanox/mlx5/core/lib/macsec_fs.c | 2411 ++++++++ .../ethernet/mellanox/mlx5/core/lib/macsec_fs.h | 64 + drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h | 57 + drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c | 211 + drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h | 92 + .../net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c | 316 + .../net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h | 32 + .../net/ethernet/mellanox/mlx5/core/lib/port_tun.c | 186 + .../net/ethernet/mellanox/mlx5/core/lib/port_tun.h | 24 + drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h | 45 + drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c | 68 + drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h | 36 + drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c | 162 + drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h | 43 + .../net/ethernet/mellanox/mlx5/core/lib/vxlan.c | 196 + .../net/ethernet/mellanox/mlx5/core/lib/vxlan.h | 70 + drivers/net/ethernet/mellanox/mlx5/core/main.c | 2340 ++++++++ drivers/net/ethernet/mellanox/mlx5/core/mcg.c | 62 + .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 374 ++ drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h | 66 + drivers/net/ethernet/mellanox/mlx5/core/mr.c | 145 + .../net/ethernet/mellanox/mlx5/core/pagealloc.c | 805 +++ drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 840 +++ drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h | 44 + drivers/net/ethernet/mellanox/mlx5/core/pd.c | 59 + drivers/net/ethernet/mellanox/mlx5/core/port.c | 1207 ++++ drivers/net/ethernet/mellanox/mlx5/core/qos.c | 84 + drivers/net/ethernet/mellanox/mlx5/core/qos.h | 30 + drivers/net/ethernet/mellanox/mlx5/core/rdma.c | 174 + drivers/net/ethernet/mellanox/mlx5/core/rdma.h | 20 + drivers/net/ethernet/mellanox/mlx5/core/rl.c | 398 ++ drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c | 49 + .../net/ethernet/mellanox/mlx5/core/sf/dev/dev.c | 380 ++ .../net/ethernet/mellanox/mlx5/core/sf/dev/dev.h | 56 + .../mlx5/core/sf/dev/diag/dev_tracepoint.h | 58 + .../ethernet/mellanox/mlx5/core/sf/dev/driver.c | 115 + .../net/ethernet/mellanox/mlx5/core/sf/devlink.c | 571 ++ .../mellanox/mlx5/core/sf/diag/sf_tracepoint.h | 173 + .../mellanox/mlx5/core/sf/diag/vhca_tracepoint.h | 40 + .../net/ethernet/mellanox/mlx5/core/sf/hw_table.c | 399 ++ .../mellanox/mlx5/core/sf/mlx5_ifc_vhca_event.h | 82 + drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h | 22 + drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h | 66 + .../ethernet/mellanox/mlx5/core/sf/vhca_event.c | 191 + .../ethernet/mellanox/mlx5/core/sf/vhca_event.h | 56 + drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 376 ++ .../ethernet/mellanox/mlx5/core/steering/Makefile | 2 + .../mellanox/mlx5/core/steering/dr_action.c | 2217 +++++++ .../ethernet/mellanox/mlx5/core/steering/dr_arg.c | 273 + .../mellanox/mlx5/core/steering/dr_buddy.c | 168 + .../ethernet/mellanox/mlx5/core/steering/dr_cmd.c | 970 +++ .../ethernet/mellanox/mlx5/core/steering/dr_dbg.c | 720 +++ .../ethernet/mellanox/mlx5/core/steering/dr_dbg.h | 15 + .../mellanox/mlx5/core/steering/dr_definer.c | 151 + .../mellanox/mlx5/core/steering/dr_domain.c | 579 ++ .../ethernet/mellanox/mlx5/core/steering/dr_fw.c | 171 + .../mellanox/mlx5/core/steering/dr_icm_pool.c | 576 ++ .../mellanox/mlx5/core/steering/dr_matcher.c | 1108 ++++ .../ethernet/mellanox/mlx5/core/steering/dr_ptrn.c | 244 + .../ethernet/mellanox/mlx5/core/steering/dr_rule.c | 1377 +++++ .../ethernet/mellanox/mlx5/core/steering/dr_send.c | 1368 +++++ .../ethernet/mellanox/mlx5/core/steering/dr_ste.c | 1463 +++++ .../ethernet/mellanox/mlx5/core/steering/dr_ste.h | 209 + .../mellanox/mlx5/core/steering/dr_ste_v0.c | 1962 +++++++ .../mellanox/mlx5/core/steering/dr_ste_v1.c | 2341 ++++++++ .../mellanox/mlx5/core/steering/dr_ste_v1.h | 97 + .../mellanox/mlx5/core/steering/dr_ste_v2.c | 234 + .../mellanox/mlx5/core/steering/dr_table.c | 319 + .../mellanox/mlx5/core/steering/dr_types.h | 1597 +++++ .../ethernet/mellanox/mlx5/core/steering/fs_dr.c | 873 +++ .../ethernet/mellanox/mlx5/core/steering/fs_dr.h | 63 + .../mellanox/mlx5/core/steering/mlx5_ifc_dr.h | 603 ++ .../mlx5/core/steering/mlx5_ifc_dr_ste_v1.h | 469 ++ .../ethernet/mellanox/mlx5/core/steering/mlx5dr.h | 201 + drivers/net/ethernet/mellanox/mlx5/core/transobj.c | 505 ++ drivers/net/ethernet/mellanox/mlx5/core/uar.c | 327 ++ drivers/net/ethernet/mellanox/mlx5/core/vport.c | 1205 ++++ drivers/net/ethernet/mellanox/mlx5/core/wq.c | 261 + drivers/net/ethernet/mellanox/mlx5/core/wq.h | 325 ++ 348 files changed, 154829 insertions(+) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/Kconfig create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/Makefile create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/alloc.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/cmd.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/cq.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/debugfs.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/dev.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/devlink.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/devlink.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/ecpf.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/ecpf.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/channels.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/channels.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/fs.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/health.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/health.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/htb.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/htb.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/params.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/params.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/port.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/port.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/qos.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/qos.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/rss.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/rss.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/selq.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/selq.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tir.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tir.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/trap.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/trap.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_common.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_dim.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_main.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_rep.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_stats.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/eq.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/Makefile create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/events.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fw.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/health.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/hwmon.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/hwmon.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/events.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/main.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/mcg.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/mr.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/pd.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/port.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/qos.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/qos.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/rdma.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/rdma.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/rl.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/diag/sf_tracepoint.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/diag/vhca_tracepoint.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/mlx5_ifc_vhca_event.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sriov.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_arg.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_buddy.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_definer.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/transobj.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/uar.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/vport.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/wq.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/wq.h (limited to 'drivers/net/ethernet/mellanox/mlx5') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig new file mode 100644 index 0000000000..c4f4de82e2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -0,0 +1,191 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Mellanox driver configuration +# + +config MLX5_CORE + tristate "Mellanox 5th generation network adapters (ConnectX series) core driver" + depends on PCI + select AUXILIARY_BUS + select NET_DEVLINK + depends on VXLAN || !VXLAN + depends on MLXFW || !MLXFW + depends on PTP_1588_CLOCK_OPTIONAL + depends on PCI_HYPERV_INTERFACE || !PCI_HYPERV_INTERFACE + depends on HWMON || !HWMON + help + Core driver for low level functionality of the ConnectX-4 and + Connect-IB cards by Mellanox Technologies. + +config MLX5_FPGA + bool "Mellanox Technologies Innova support" + depends on MLX5_CORE + help + Build support for the Innova family of network cards by Mellanox + Technologies. Innova network cards are comprised of a ConnectX chip + and an FPGA chip on one board. If you select this option, the + mlx5_core driver will include the Innova FPGA core and allow building + sandbox-specific client drivers. + +config MLX5_CORE_EN + bool "Mellanox 5th generation network adapters (ConnectX series) Ethernet support" + depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE + select PAGE_POOL + select DIMLIB + help + Ethernet support in Mellanox Technologies ConnectX-4 NIC. + +config MLX5_EN_ARFS + bool "Mellanox MLX5 ethernet accelerated receive flow steering (ARFS) support" + depends on MLX5_CORE_EN && RFS_ACCEL + default y + help + Mellanox MLX5 ethernet hardware-accelerated receive flow steering support, + Enables ethernet netdevice arfs support and ntuple filtering. + +config MLX5_EN_RXNFC + bool "Mellanox MLX5 ethernet rx nfc flow steering support" + depends on MLX5_CORE_EN + default y + help + Mellanox MLX5 ethernet rx nfc flow steering support + Enables ethtool receive network flow classification, which allows user defined + flow rules to direct traffic into arbitrary rx queue via ethtool set/get_rxnfc + API. + +config MLX5_MPFS + bool "Mellanox Technologies MLX5 MPFS support" + depends on MLX5_CORE_EN + default y + help + Mellanox Technologies Ethernet Multi-Physical Function Switch (MPFS) + support in ConnectX NIC. MPFs is required for when multi-PF configuration + is enabled to allow passing user configured unicast MAC addresses to the + requesting PF. + +config MLX5_ESWITCH + bool "Mellanox Technologies MLX5 SRIOV E-Switch support" + depends on MLX5_CORE_EN && NET_SWITCHDEV + default y + help + Mellanox Technologies Ethernet SRIOV E-Switch support in ConnectX NIC. + E-Switch provides internal SRIOV packet steering and switching for the + enabled VFs and PF in two available modes: + Legacy SRIOV mode (L2 mac vlan steering based). + Switchdev mode (eswitch offloads). + +config MLX5_BRIDGE + bool + depends on MLX5_ESWITCH && BRIDGE + default y + help + mlx5 ConnectX offloads support for Ethernet Bridging (BRIDGE). + Enable adding representors of mlx5 uplink and VF ports to Bridge and + offloading rules for traffic between such ports. Supports VLANs (trunk and + access modes). + +config MLX5_CLS_ACT + bool "MLX5 TC classifier action support" + depends on MLX5_ESWITCH && NET_CLS_ACT && NET_TC_SKB_EXT + default y + help + mlx5 ConnectX offloads support for TC classifier action (NET_CLS_ACT), + works in both native NIC mode and Switchdev SRIOV mode. + Actions get attached to a Hardware offloaded classifiers and are + invoked after a successful classification. Actions are used to + overwrite the classification result, instantly drop or redirect and/or + reformat packets in wire speeds without involving the host cpu. + + If set to N, TC offloads in both NIC and switchdev modes will be disabled. + If unsure, set to Y + +config MLX5_TC_CT + bool "MLX5 TC connection tracking offload support" + depends on MLX5_CLS_ACT && NF_FLOW_TABLE && NET_ACT_CT + default y + help + Say Y here if you want to support offloading connection tracking rules + via tc ct action. + + If unsure, set to Y + +config MLX5_TC_SAMPLE + bool "MLX5 TC sample offload support" + depends on MLX5_CLS_ACT + depends on PSAMPLE=y || PSAMPLE=n || MLX5_CORE=m + default y + help + Say Y here if you want to support offloading sample rules via tc + sample action. + If set to N, will not be able to configure tc rules with sample + action. + + If unsure, set to Y + +config MLX5_CORE_EN_DCB + bool "Data Center Bridging (DCB) Support" + default y + depends on MLX5_CORE_EN && DCB + help + Say Y here if you want to use Data Center Bridging (DCB) in the + driver. + If set to N, will not be able to configure QoS and ratelimit attributes. + This flag is depended on the kernel's DCB support. + + If unsure, set to Y + +config MLX5_CORE_IPOIB + bool "Mellanox 5th generation network adapters (connectX series) IPoIB offloads support" + depends on MLX5_CORE_EN + help + MLX5 IPoIB offloads & acceleration support. + +config MLX5_MACSEC + bool "Connect-X support for MACSec offload" + depends on MLX5_CORE_EN + depends on MACSEC + default n + help + Build support for MACsec cryptography-offload acceleration in the NIC. + +config MLX5_EN_IPSEC + bool "Mellanox Technologies IPsec Connect-X support" + depends on MLX5_CORE_EN + depends on XFRM_OFFLOAD + depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD + help + Build support for IPsec cryptography-offload acceleration in the NIC. + +config MLX5_EN_TLS + bool "Mellanox Technologies TLS Connect-X support" + depends on TLS_DEVICE + depends on TLS=y || MLX5_CORE=m + depends on MLX5_CORE_EN + help + Build support for TLS cryptography-offload acceleration in the NIC. + +config MLX5_SW_STEERING + bool "Mellanox Technologies software-managed steering" + depends on MLX5_CORE_EN && MLX5_ESWITCH + select CRC32 + default y + help + Build support for software-managed steering in the NIC. + +config MLX5_SF + bool "Mellanox Technologies subfunction device support using auxiliary device" + depends on MLX5_CORE && MLX5_CORE_EN + help + Build support for subfuction device in the NIC. A Mellanox subfunction + device can support RDMA, netdevice and vdpa device. + It is similar to a SRIOV VF but it doesn't require SRIOV support. + +config MLX5_SF_MANAGER + bool + depends on MLX5_SF && MLX5_ESWITCH + default y + help + Build support for subfuction port in the NIC. A Mellanox subfunction + port is managed through devlink. A subfunction supports RDMA, netdevice + and vdpa device. It is similar to a SRIOV VF but it doesn't require + SRIOV support. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile new file mode 100644 index 0000000000..7e94caca48 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -0,0 +1,130 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for Mellanox 5th generation network adapters +# (ConnectX series) core & netdev driver +# + +subdir-ccflags-y += -I$(src) + +obj-$(CONFIG_MLX5_CORE) += mlx5_core.o + +# +# mlx5 core basic +# +mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ + health.o mcg.o cq.o alloc.o port.o mr.o pd.o \ + transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ + fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \ + lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ + diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o diag/reporter_vnic.o \ + fw_reset.o qos.o lib/tout.o lib/aso.o + +# +# Netdev basic +# +mlx5_core-$(CONFIG_MLX5_CORE_EN) += en/rqt.o en/tir.o en/rss.o en/rx_res.o \ + en/channels.o en_main.o en_common.o en_fs.o en_ethtool.o \ + en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ + en_selftest.o en/port.o en/monitor_stats.o en/health.o \ + en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \ + en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \ + en/qos.o en/htb.o en/trap.o en/fs_tt_redirect.o en/selq.o \ + lib/crypto.o + +# +# Netdev extra +# +mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o +mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o +mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o +mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag/mp.o lag/port_sel.o lib/geneve.o lib/port_tun.o \ + en_rep.o en/rep/bond.o en/mod_hdr.o \ + en/mapping.o lag/mpesw.o +mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \ + lib/fs_chains.o en/tc_tun.o \ + esw/indir_table.o en/tc_tun_encap.o \ + en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \ + en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o \ + en/tc/post_act.o en/tc/int_port.o en/tc/meter.o \ + en/tc/post_meter.o en/tc/act_stats.o + +mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en/tc/act/act.o en/tc/act/drop.o en/tc/act/trap.o \ + en/tc/act/accept.o en/tc/act/mark.o en/tc/act/goto.o \ + en/tc/act/tun.o en/tc/act/csum.o en/tc/act/pedit.o \ + en/tc/act/vlan.o en/tc/act/vlan_mangle.o en/tc/act/mpls.o \ + en/tc/act/mirred.o en/tc/act/mirred_nic.o \ + en/tc/act/ct.o en/tc/act/sample.o en/tc/act/ptype.o \ + en/tc/act/redirect_ingress.o en/tc/act/police.o + +ifneq ($(CONFIG_MLX5_TC_CT),) + mlx5_core-y += en/tc_ct.o en/tc/ct_fs_dmfs.o + mlx5_core-$(CONFIG_MLX5_SW_STEERING) += en/tc/ct_fs_smfs.o +endif + +mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o + +# +# Core extra +# +mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \ + ecpf.o rdma.o esw/legacy.o \ + esw/devlink_port.o esw/vporttbl.o esw/qos.o esw/ipsec.o + +mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \ + esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \ + esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o + +ifneq ($(CONFIG_MLX5_EN_IPSEC),) + mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/ipsec_fs.o +endif + +mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o esw/bridge_mcast.o esw/bridge_debugfs.o \ + en/rep/bridge.o + +mlx5_core-$(CONFIG_HWMON) += hwmon.o +mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o +mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o +mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o +mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o lib/hv_vhca.o + +# +# Ipoib netdev +# +mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib_vlan.o + +# +# Accelerations & FPGA +# +mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o + +mlx5_core-$(CONFIG_MLX5_MACSEC) += en_accel/macsec.o lib/macsec_fs.o \ + en_accel/macsec_stats.o + +mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \ + en_accel/ipsec_stats.o en_accel/ipsec_fs.o \ + en_accel/ipsec_offload.o lib/ipsec_fs_roce.o + +mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/ktls_stats.o \ + en_accel/fs_tcp.o en_accel/ktls.o en_accel/ktls_txrx.o \ + en_accel/ktls_tx.o en_accel/ktls_rx.o + +mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o \ + steering/dr_matcher.o steering/dr_rule.o \ + steering/dr_icm_pool.o steering/dr_buddy.o \ + steering/dr_ste.o steering/dr_send.o \ + steering/dr_ste_v0.o steering/dr_ste_v1.o \ + steering/dr_ste_v2.o \ + steering/dr_cmd.o steering/dr_fw.o \ + steering/dr_action.o steering/fs_dr.o \ + steering/dr_definer.o steering/dr_ptrn.o \ + steering/dr_arg.o steering/dr_dbg.o lib/smfs.o +# +# SF device +# +mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o irq_affinity.o + +# +# SF manager +# +mlx5_core-$(CONFIG_MLX5_SF_MANAGER) += sf/cmd.o sf/hw_table.o sf/devlink.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c new file mode 100644 index 0000000000..6aca004e88 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mlx5_core.h" + +struct mlx5_db_pgdir { + struct list_head list; + unsigned long *bitmap; + __be32 *db_page; + dma_addr_t db_dma; +}; + +/* Handling for queue buffers -- we allocate a bunch of memory and + * register it in a memory region at HCA virtual address 0. + */ + +static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev, + size_t size, dma_addr_t *dma_handle, + int node) +{ + struct device *device = mlx5_core_dma_dev(dev); + struct mlx5_priv *priv = &dev->priv; + int original_node; + void *cpu_handle; + + mutex_lock(&priv->alloc_mutex); + original_node = dev_to_node(device); + set_dev_node(device, node); + cpu_handle = dma_alloc_coherent(device, size, dma_handle, + GFP_KERNEL); + set_dev_node(device, original_node); + mutex_unlock(&priv->alloc_mutex); + return cpu_handle; +} + +int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, + struct mlx5_frag_buf *buf, int node) +{ + int i; + + buf->size = size; + buf->npages = DIV_ROUND_UP(size, PAGE_SIZE); + buf->page_shift = PAGE_SHIFT; + buf->frags = kcalloc(buf->npages, sizeof(struct mlx5_buf_list), + GFP_KERNEL); + if (!buf->frags) + goto err_out; + + for (i = 0; i < buf->npages; i++) { + struct mlx5_buf_list *frag = &buf->frags[i]; + int frag_sz = min_t(int, size, PAGE_SIZE); + + frag->buf = mlx5_dma_zalloc_coherent_node(dev, frag_sz, + &frag->map, node); + if (!frag->buf) + goto err_free_buf; + if (frag->map & ((1 << buf->page_shift) - 1)) { + dma_free_coherent(mlx5_core_dma_dev(dev), frag_sz, + buf->frags[i].buf, buf->frags[i].map); + mlx5_core_warn(dev, "unexpected map alignment: %pad, page_shift=%d\n", + &frag->map, buf->page_shift); + goto err_free_buf; + } + size -= frag_sz; + } + + return 0; + +err_free_buf: + while (i--) + dma_free_coherent(mlx5_core_dma_dev(dev), PAGE_SIZE, buf->frags[i].buf, + buf->frags[i].map); + kfree(buf->frags); +err_out: + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(mlx5_frag_buf_alloc_node); + +void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf) +{ + int size = buf->size; + int i; + + for (i = 0; i < buf->npages; i++) { + int frag_sz = min_t(int, size, PAGE_SIZE); + + dma_free_coherent(mlx5_core_dma_dev(dev), frag_sz, buf->frags[i].buf, + buf->frags[i].map); + size -= frag_sz; + } + kfree(buf->frags); +} +EXPORT_SYMBOL_GPL(mlx5_frag_buf_free); + +static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev, + int node) +{ + u32 db_per_page = PAGE_SIZE / cache_line_size(); + struct mlx5_db_pgdir *pgdir; + + pgdir = kzalloc_node(sizeof(*pgdir), GFP_KERNEL, node); + if (!pgdir) + return NULL; + + pgdir->bitmap = bitmap_zalloc_node(db_per_page, GFP_KERNEL, node); + if (!pgdir->bitmap) { + kfree(pgdir); + return NULL; + } + + bitmap_fill(pgdir->bitmap, db_per_page); + + pgdir->db_page = mlx5_dma_zalloc_coherent_node(dev, PAGE_SIZE, + &pgdir->db_dma, node); + if (!pgdir->db_page) { + bitmap_free(pgdir->bitmap); + kfree(pgdir); + return NULL; + } + + return pgdir; +} + +static int mlx5_alloc_db_from_pgdir(struct mlx5_db_pgdir *pgdir, + struct mlx5_db *db) +{ + u32 db_per_page = PAGE_SIZE / cache_line_size(); + int offset; + int i; + + i = find_first_bit(pgdir->bitmap, db_per_page); + if (i >= db_per_page) + return -ENOMEM; + + __clear_bit(i, pgdir->bitmap); + + db->u.pgdir = pgdir; + db->index = i; + offset = db->index * cache_line_size(); + db->db = pgdir->db_page + offset / sizeof(*pgdir->db_page); + db->dma = pgdir->db_dma + offset; + + db->db[0] = 0; + db->db[1] = 0; + + return 0; +} + +int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db, int node) +{ + struct mlx5_db_pgdir *pgdir; + int ret = 0; + + mutex_lock(&dev->priv.pgdir_mutex); + + list_for_each_entry(pgdir, &dev->priv.pgdir_list, list) + if (!mlx5_alloc_db_from_pgdir(pgdir, db)) + goto out; + + pgdir = mlx5_alloc_db_pgdir(dev, node); + if (!pgdir) { + ret = -ENOMEM; + goto out; + } + + list_add(&pgdir->list, &dev->priv.pgdir_list); + + /* This should never fail -- we just allocated an empty page: */ + WARN_ON(mlx5_alloc_db_from_pgdir(pgdir, db)); + +out: + mutex_unlock(&dev->priv.pgdir_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(mlx5_db_alloc_node); + +void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db) +{ + u32 db_per_page = PAGE_SIZE / cache_line_size(); + + mutex_lock(&dev->priv.pgdir_mutex); + + __set_bit(db->index, db->u.pgdir->bitmap); + + if (bitmap_full(db->u.pgdir->bitmap, db_per_page)) { + dma_free_coherent(mlx5_core_dma_dev(dev), PAGE_SIZE, + db->u.pgdir->db_page, db->u.pgdir->db_dma); + list_del(&db->u.pgdir->list); + bitmap_free(db->u.pgdir->bitmap); + kfree(db->u.pgdir); + } + + mutex_unlock(&dev->priv.pgdir_mutex); +} +EXPORT_SYMBOL_GPL(mlx5_db_free); + +void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm) +{ + int i; + + WARN_ON(perm & 0xfc); + for (i = 0; i < buf->npages; i++) + pas[i] = cpu_to_be64(buf->frags[i].map | perm); +} +EXPORT_SYMBOL_GPL(mlx5_fill_page_frag_array_perm); + +void mlx5_fill_page_frag_array(struct mlx5_frag_buf *buf, __be64 *pas) +{ + mlx5_fill_page_frag_array_perm(buf, pas, 0); +} +EXPORT_SYMBOL_GPL(mlx5_fill_page_frag_array); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c new file mode 100644 index 0000000000..55efb932ab --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -0,0 +1,2312 @@ +/* + * Copyright (c) 2013-2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mlx5_core.h" +#include "lib/eq.h" +#include "lib/tout.h" +#define CREATE_TRACE_POINTS +#include "diag/cmd_tracepoint.h" + +struct mlx5_ifc_mbox_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_mbox_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +enum { + CMD_IF_REV = 5, +}; + +enum { + CMD_MODE_POLLING, + CMD_MODE_EVENTS +}; + +enum { + MLX5_CMD_DELIVERY_STAT_OK = 0x0, + MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR = 0x1, + MLX5_CMD_DELIVERY_STAT_TOK_ERR = 0x2, + MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR = 0x3, + MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR = 0x4, + MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR = 0x5, + MLX5_CMD_DELIVERY_STAT_FW_ERR = 0x6, + MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR = 0x7, + MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR = 0x8, + MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR = 0x9, + MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR = 0x10, +}; + +static u16 in_to_opcode(void *in) +{ + return MLX5_GET(mbox_in, in, opcode); +} + +/* Returns true for opcodes that might be triggered very frequently and throttle + * the command interface. Limit their command slots usage. + */ +static bool mlx5_cmd_is_throttle_opcode(u16 op) +{ + switch (op) { + case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: + case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT: + case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: + case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: + case MLX5_CMD_OP_SYNC_CRYPTO: + return true; + } + return false; +} + +static struct mlx5_cmd_work_ent * +cmd_alloc_ent(struct mlx5_cmd *cmd, struct mlx5_cmd_msg *in, + struct mlx5_cmd_msg *out, void *uout, int uout_size, + mlx5_cmd_cbk_t cbk, void *context, int page_queue) +{ + gfp_t alloc_flags = cbk ? GFP_ATOMIC : GFP_KERNEL; + struct mlx5_cmd_work_ent *ent; + + ent = kzalloc(sizeof(*ent), alloc_flags); + if (!ent) + return ERR_PTR(-ENOMEM); + + ent->idx = -EINVAL; + ent->in = in; + ent->out = out; + ent->uout = uout; + ent->uout_size = uout_size; + ent->callback = cbk; + ent->context = context; + ent->cmd = cmd; + ent->page_queue = page_queue; + ent->op = in_to_opcode(in->first.data); + refcount_set(&ent->refcnt, 1); + + return ent; +} + +static void cmd_free_ent(struct mlx5_cmd_work_ent *ent) +{ + kfree(ent); +} + +static u8 alloc_token(struct mlx5_cmd *cmd) +{ + u8 token; + + spin_lock(&cmd->token_lock); + cmd->token++; + if (cmd->token == 0) + cmd->token++; + token = cmd->token; + spin_unlock(&cmd->token_lock); + + return token; +} + +static int cmd_alloc_index(struct mlx5_cmd *cmd, struct mlx5_cmd_work_ent *ent) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cmd->alloc_lock, flags); + ret = find_first_bit(&cmd->vars.bitmask, cmd->vars.max_reg_cmds); + if (ret < cmd->vars.max_reg_cmds) { + clear_bit(ret, &cmd->vars.bitmask); + ent->idx = ret; + cmd->ent_arr[ent->idx] = ent; + } + spin_unlock_irqrestore(&cmd->alloc_lock, flags); + + return ret < cmd->vars.max_reg_cmds ? ret : -ENOMEM; +} + +static void cmd_free_index(struct mlx5_cmd *cmd, int idx) +{ + lockdep_assert_held(&cmd->alloc_lock); + set_bit(idx, &cmd->vars.bitmask); +} + +static void cmd_ent_get(struct mlx5_cmd_work_ent *ent) +{ + refcount_inc(&ent->refcnt); +} + +static void cmd_ent_put(struct mlx5_cmd_work_ent *ent) +{ + struct mlx5_cmd *cmd = ent->cmd; + unsigned long flags; + + spin_lock_irqsave(&cmd->alloc_lock, flags); + if (!refcount_dec_and_test(&ent->refcnt)) + goto out; + + if (ent->idx >= 0) { + cmd_free_index(cmd, ent->idx); + up(ent->page_queue ? &cmd->vars.pages_sem : &cmd->vars.sem); + } + + cmd_free_ent(ent); +out: + spin_unlock_irqrestore(&cmd->alloc_lock, flags); +} + +static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx) +{ + return cmd->cmd_buf + (idx << cmd->vars.log_stride); +} + +static int mlx5_calc_cmd_blocks(struct mlx5_cmd_msg *msg) +{ + int size = msg->len; + int blen = size - min_t(int, sizeof(msg->first.data), size); + + return DIV_ROUND_UP(blen, MLX5_CMD_DATA_BLOCK_SIZE); +} + +static u8 xor8_buf(void *buf, size_t offset, int len) +{ + u8 *ptr = buf; + u8 sum = 0; + int i; + int end = len + offset; + + for (i = offset; i < end; i++) + sum ^= ptr[i]; + + return sum; +} + +static int verify_block_sig(struct mlx5_cmd_prot_block *block) +{ + size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0); + int xor_len = sizeof(*block) - sizeof(block->data) - 1; + + if (xor8_buf(block, rsvd0_off, xor_len) != 0xff) + return -EHWPOISON; + + if (xor8_buf(block, 0, sizeof(*block)) != 0xff) + return -EHWPOISON; + + return 0; +} + +static void calc_block_sig(struct mlx5_cmd_prot_block *block) +{ + int ctrl_xor_len = sizeof(*block) - sizeof(block->data) - 2; + size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0); + + block->ctrl_sig = ~xor8_buf(block, rsvd0_off, ctrl_xor_len); + block->sig = ~xor8_buf(block, 0, sizeof(*block) - 1); +} + +static void calc_chain_sig(struct mlx5_cmd_msg *msg) +{ + struct mlx5_cmd_mailbox *next = msg->next; + int n = mlx5_calc_cmd_blocks(msg); + int i = 0; + + for (i = 0; i < n && next; i++) { + calc_block_sig(next->buf); + next = next->next; + } +} + +static void set_signature(struct mlx5_cmd_work_ent *ent, int csum) +{ + ent->lay->sig = ~xor8_buf(ent->lay, 0, sizeof(*ent->lay)); + if (csum) { + calc_chain_sig(ent->in); + calc_chain_sig(ent->out); + } +} + +static void poll_timeout(struct mlx5_cmd_work_ent *ent) +{ + struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev, cmd); + u64 cmd_to_ms = mlx5_tout_ms(dev, CMD); + unsigned long poll_end; + u8 own; + + poll_end = jiffies + msecs_to_jiffies(cmd_to_ms + 1000); + + do { + own = READ_ONCE(ent->lay->status_own); + if (!(own & CMD_OWNER_HW)) { + ent->ret = 0; + return; + } + cond_resched(); + } while (time_before(jiffies, poll_end)); + + ent->ret = -ETIMEDOUT; +} + +static int verify_signature(struct mlx5_cmd_work_ent *ent) +{ + struct mlx5_cmd_mailbox *next = ent->out->next; + int n = mlx5_calc_cmd_blocks(ent->out); + int err; + u8 sig; + int i = 0; + + sig = xor8_buf(ent->lay, 0, sizeof(*ent->lay)); + if (sig != 0xff) + return -EHWPOISON; + + for (i = 0; i < n && next; i++) { + err = verify_block_sig(next->buf); + if (err) + return -EHWPOISON; + + next = next->next; + } + + return 0; +} + +static void dump_buf(void *buf, int size, int data_only, int offset, int idx) +{ + __be32 *p = buf; + int i; + + for (i = 0; i < size; i += 16) { + pr_debug("cmd[%d]: %03x: %08x %08x %08x %08x\n", idx, offset, + be32_to_cpu(p[0]), be32_to_cpu(p[1]), + be32_to_cpu(p[2]), be32_to_cpu(p[3])); + p += 4; + offset += 16; + } + if (!data_only) + pr_debug("\n"); +} + +static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, + u32 *synd, u8 *status) +{ + *synd = 0; + *status = 0; + + switch (op) { + case MLX5_CMD_OP_TEARDOWN_HCA: + case MLX5_CMD_OP_DISABLE_HCA: + case MLX5_CMD_OP_MANAGE_PAGES: + case MLX5_CMD_OP_DESTROY_MKEY: + case MLX5_CMD_OP_DESTROY_EQ: + case MLX5_CMD_OP_DESTROY_CQ: + case MLX5_CMD_OP_DESTROY_QP: + case MLX5_CMD_OP_DESTROY_PSV: + case MLX5_CMD_OP_DESTROY_SRQ: + case MLX5_CMD_OP_DESTROY_XRC_SRQ: + case MLX5_CMD_OP_DESTROY_XRQ: + case MLX5_CMD_OP_DESTROY_DCT: + case MLX5_CMD_OP_DEALLOC_Q_COUNTER: + case MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT: + case MLX5_CMD_OP_DEALLOC_PD: + case MLX5_CMD_OP_DEALLOC_UAR: + case MLX5_CMD_OP_DETACH_FROM_MCG: + case MLX5_CMD_OP_DEALLOC_XRCD: + case MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN: + case MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT: + case MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY: + case MLX5_CMD_OP_DESTROY_LAG: + case MLX5_CMD_OP_DESTROY_VPORT_LAG: + case MLX5_CMD_OP_DESTROY_TIR: + case MLX5_CMD_OP_DESTROY_SQ: + case MLX5_CMD_OP_DESTROY_RQ: + case MLX5_CMD_OP_DESTROY_RMP: + case MLX5_CMD_OP_DESTROY_TIS: + case MLX5_CMD_OP_DESTROY_RQT: + case MLX5_CMD_OP_DESTROY_FLOW_TABLE: + case MLX5_CMD_OP_DESTROY_FLOW_GROUP: + case MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_DEALLOC_FLOW_COUNTER: + case MLX5_CMD_OP_2ERR_QP: + case MLX5_CMD_OP_2RST_QP: + case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT: + case MLX5_CMD_OP_MODIFY_FLOW_TABLE: + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT: + case MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT: + case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT: + case MLX5_CMD_OP_FPGA_DESTROY_QP: + case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT: + case MLX5_CMD_OP_DEALLOC_MEMIC: + case MLX5_CMD_OP_PAGE_FAULT_RESUME: + case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS: + case MLX5_CMD_OP_DEALLOC_SF: + case MLX5_CMD_OP_DESTROY_UCTX: + case MLX5_CMD_OP_DESTROY_UMEM: + case MLX5_CMD_OP_MODIFY_RQT: + return MLX5_CMD_STAT_OK; + + case MLX5_CMD_OP_QUERY_HCA_CAP: + case MLX5_CMD_OP_QUERY_ADAPTER: + case MLX5_CMD_OP_INIT_HCA: + case MLX5_CMD_OP_ENABLE_HCA: + case MLX5_CMD_OP_QUERY_PAGES: + case MLX5_CMD_OP_SET_HCA_CAP: + case MLX5_CMD_OP_QUERY_ISSI: + case MLX5_CMD_OP_SET_ISSI: + case MLX5_CMD_OP_CREATE_MKEY: + case MLX5_CMD_OP_QUERY_MKEY: + case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS: + case MLX5_CMD_OP_CREATE_EQ: + case MLX5_CMD_OP_QUERY_EQ: + case MLX5_CMD_OP_GEN_EQE: + case MLX5_CMD_OP_CREATE_CQ: + case MLX5_CMD_OP_QUERY_CQ: + case MLX5_CMD_OP_MODIFY_CQ: + case MLX5_CMD_OP_CREATE_QP: + case MLX5_CMD_OP_RST2INIT_QP: + case MLX5_CMD_OP_INIT2RTR_QP: + case MLX5_CMD_OP_RTR2RTS_QP: + case MLX5_CMD_OP_RTS2RTS_QP: + case MLX5_CMD_OP_SQERR2RTS_QP: + case MLX5_CMD_OP_QUERY_QP: + case MLX5_CMD_OP_SQD_RTS_QP: + case MLX5_CMD_OP_INIT2INIT_QP: + case MLX5_CMD_OP_CREATE_PSV: + case MLX5_CMD_OP_CREATE_SRQ: + case MLX5_CMD_OP_QUERY_SRQ: + case MLX5_CMD_OP_ARM_RQ: + case MLX5_CMD_OP_CREATE_XRC_SRQ: + case MLX5_CMD_OP_QUERY_XRC_SRQ: + case MLX5_CMD_OP_ARM_XRC_SRQ: + case MLX5_CMD_OP_CREATE_XRQ: + case MLX5_CMD_OP_QUERY_XRQ: + case MLX5_CMD_OP_ARM_XRQ: + case MLX5_CMD_OP_CREATE_DCT: + case MLX5_CMD_OP_DRAIN_DCT: + case MLX5_CMD_OP_QUERY_DCT: + case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: + case MLX5_CMD_OP_QUERY_VPORT_STATE: + case MLX5_CMD_OP_MODIFY_VPORT_STATE: + case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT: + case MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_ROCE_ADDRESS: + case MLX5_CMD_OP_SET_ROCE_ADDRESS: + case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: + case MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_HCA_VPORT_GID: + case MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY: + case MLX5_CMD_OP_QUERY_VNIC_ENV: + case MLX5_CMD_OP_QUERY_VPORT_COUNTER: + case MLX5_CMD_OP_ALLOC_Q_COUNTER: + case MLX5_CMD_OP_QUERY_Q_COUNTER: + case MLX5_CMD_OP_SET_MONITOR_COUNTER: + case MLX5_CMD_OP_ARM_MONITOR_COUNTER: + case MLX5_CMD_OP_SET_PP_RATE_LIMIT: + case MLX5_CMD_OP_QUERY_RATE_LIMIT: + case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_CREATE_QOS_PARA_VPORT: + case MLX5_CMD_OP_ALLOC_PD: + case MLX5_CMD_OP_ALLOC_UAR: + case MLX5_CMD_OP_CONFIG_INT_MODERATION: + case MLX5_CMD_OP_ACCESS_REG: + case MLX5_CMD_OP_ATTACH_TO_MCG: + case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG: + case MLX5_CMD_OP_MAD_IFC: + case MLX5_CMD_OP_QUERY_MAD_DEMUX: + case MLX5_CMD_OP_SET_MAD_DEMUX: + case MLX5_CMD_OP_NOP: + case MLX5_CMD_OP_ALLOC_XRCD: + case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: + case MLX5_CMD_OP_QUERY_CONG_STATUS: + case MLX5_CMD_OP_MODIFY_CONG_STATUS: + case MLX5_CMD_OP_QUERY_CONG_PARAMS: + case MLX5_CMD_OP_MODIFY_CONG_PARAMS: + case MLX5_CMD_OP_QUERY_CONG_STATISTICS: + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY: + case MLX5_CMD_OP_CREATE_LAG: + case MLX5_CMD_OP_MODIFY_LAG: + case MLX5_CMD_OP_QUERY_LAG: + case MLX5_CMD_OP_CREATE_VPORT_LAG: + case MLX5_CMD_OP_CREATE_TIR: + case MLX5_CMD_OP_MODIFY_TIR: + case MLX5_CMD_OP_QUERY_TIR: + case MLX5_CMD_OP_CREATE_SQ: + case MLX5_CMD_OP_MODIFY_SQ: + case MLX5_CMD_OP_QUERY_SQ: + case MLX5_CMD_OP_CREATE_RQ: + case MLX5_CMD_OP_MODIFY_RQ: + case MLX5_CMD_OP_QUERY_RQ: + case MLX5_CMD_OP_CREATE_RMP: + case MLX5_CMD_OP_MODIFY_RMP: + case MLX5_CMD_OP_QUERY_RMP: + case MLX5_CMD_OP_CREATE_TIS: + case MLX5_CMD_OP_MODIFY_TIS: + case MLX5_CMD_OP_QUERY_TIS: + case MLX5_CMD_OP_CREATE_RQT: + case MLX5_CMD_OP_QUERY_RQT: + + case MLX5_CMD_OP_CREATE_FLOW_TABLE: + case MLX5_CMD_OP_QUERY_FLOW_TABLE: + case MLX5_CMD_OP_CREATE_FLOW_GROUP: + case MLX5_CMD_OP_QUERY_FLOW_GROUP: + case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: + case MLX5_CMD_OP_QUERY_FLOW_COUNTER: + case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT: + case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: + case MLX5_CMD_OP_FPGA_CREATE_QP: + case MLX5_CMD_OP_FPGA_MODIFY_QP: + case MLX5_CMD_OP_FPGA_QUERY_QP: + case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS: + case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: + case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: + case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: + case MLX5_CMD_OP_CREATE_UCTX: + case MLX5_CMD_OP_CREATE_UMEM: + case MLX5_CMD_OP_ALLOC_MEMIC: + case MLX5_CMD_OP_MODIFY_XRQ: + case MLX5_CMD_OP_RELEASE_XRQ_ERROR: + case MLX5_CMD_OP_QUERY_VHCA_STATE: + case MLX5_CMD_OP_MODIFY_VHCA_STATE: + case MLX5_CMD_OP_ALLOC_SF: + case MLX5_CMD_OP_SUSPEND_VHCA: + case MLX5_CMD_OP_RESUME_VHCA: + case MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE: + case MLX5_CMD_OP_SAVE_VHCA_STATE: + case MLX5_CMD_OP_LOAD_VHCA_STATE: + case MLX5_CMD_OP_SYNC_CRYPTO: + *status = MLX5_DRIVER_STATUS_ABORTED; + *synd = MLX5_DRIVER_SYND; + return -ENOLINK; + default: + mlx5_core_err(dev, "Unknown FW command (%d)\n", op); + return -EINVAL; + } +} + +const char *mlx5_command_str(int command) +{ +#define MLX5_COMMAND_STR_CASE(__cmd) case MLX5_CMD_OP_ ## __cmd: return #__cmd + + switch (command) { + MLX5_COMMAND_STR_CASE(QUERY_HCA_CAP); + MLX5_COMMAND_STR_CASE(QUERY_ADAPTER); + MLX5_COMMAND_STR_CASE(INIT_HCA); + MLX5_COMMAND_STR_CASE(TEARDOWN_HCA); + MLX5_COMMAND_STR_CASE(ENABLE_HCA); + MLX5_COMMAND_STR_CASE(DISABLE_HCA); + MLX5_COMMAND_STR_CASE(QUERY_PAGES); + MLX5_COMMAND_STR_CASE(MANAGE_PAGES); + MLX5_COMMAND_STR_CASE(SET_HCA_CAP); + MLX5_COMMAND_STR_CASE(QUERY_ISSI); + MLX5_COMMAND_STR_CASE(SET_ISSI); + MLX5_COMMAND_STR_CASE(SET_DRIVER_VERSION); + MLX5_COMMAND_STR_CASE(CREATE_MKEY); + MLX5_COMMAND_STR_CASE(QUERY_MKEY); + MLX5_COMMAND_STR_CASE(DESTROY_MKEY); + MLX5_COMMAND_STR_CASE(QUERY_SPECIAL_CONTEXTS); + MLX5_COMMAND_STR_CASE(PAGE_FAULT_RESUME); + MLX5_COMMAND_STR_CASE(CREATE_EQ); + MLX5_COMMAND_STR_CASE(DESTROY_EQ); + MLX5_COMMAND_STR_CASE(QUERY_EQ); + MLX5_COMMAND_STR_CASE(GEN_EQE); + MLX5_COMMAND_STR_CASE(CREATE_CQ); + MLX5_COMMAND_STR_CASE(DESTROY_CQ); + MLX5_COMMAND_STR_CASE(QUERY_CQ); + MLX5_COMMAND_STR_CASE(MODIFY_CQ); + MLX5_COMMAND_STR_CASE(CREATE_QP); + MLX5_COMMAND_STR_CASE(DESTROY_QP); + MLX5_COMMAND_STR_CASE(RST2INIT_QP); + MLX5_COMMAND_STR_CASE(INIT2RTR_QP); + MLX5_COMMAND_STR_CASE(RTR2RTS_QP); + MLX5_COMMAND_STR_CASE(RTS2RTS_QP); + MLX5_COMMAND_STR_CASE(SQERR2RTS_QP); + MLX5_COMMAND_STR_CASE(2ERR_QP); + MLX5_COMMAND_STR_CASE(2RST_QP); + MLX5_COMMAND_STR_CASE(QUERY_QP); + MLX5_COMMAND_STR_CASE(SQD_RTS_QP); + MLX5_COMMAND_STR_CASE(INIT2INIT_QP); + MLX5_COMMAND_STR_CASE(CREATE_PSV); + MLX5_COMMAND_STR_CASE(DESTROY_PSV); + MLX5_COMMAND_STR_CASE(CREATE_SRQ); + MLX5_COMMAND_STR_CASE(DESTROY_SRQ); + MLX5_COMMAND_STR_CASE(QUERY_SRQ); + MLX5_COMMAND_STR_CASE(ARM_RQ); + MLX5_COMMAND_STR_CASE(CREATE_XRC_SRQ); + MLX5_COMMAND_STR_CASE(DESTROY_XRC_SRQ); + MLX5_COMMAND_STR_CASE(QUERY_XRC_SRQ); + MLX5_COMMAND_STR_CASE(ARM_XRC_SRQ); + MLX5_COMMAND_STR_CASE(CREATE_DCT); + MLX5_COMMAND_STR_CASE(DESTROY_DCT); + MLX5_COMMAND_STR_CASE(DRAIN_DCT); + MLX5_COMMAND_STR_CASE(QUERY_DCT); + MLX5_COMMAND_STR_CASE(ARM_DCT_FOR_KEY_VIOLATION); + MLX5_COMMAND_STR_CASE(QUERY_VPORT_STATE); + MLX5_COMMAND_STR_CASE(MODIFY_VPORT_STATE); + MLX5_COMMAND_STR_CASE(QUERY_ESW_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(MODIFY_ESW_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(QUERY_NIC_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(MODIFY_NIC_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(QUERY_ROCE_ADDRESS); + MLX5_COMMAND_STR_CASE(SET_ROCE_ADDRESS); + MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(MODIFY_HCA_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_GID); + MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_PKEY); + MLX5_COMMAND_STR_CASE(QUERY_VNIC_ENV); + MLX5_COMMAND_STR_CASE(QUERY_VPORT_COUNTER); + MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER); + MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER); + MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER); + MLX5_COMMAND_STR_CASE(SET_MONITOR_COUNTER); + MLX5_COMMAND_STR_CASE(ARM_MONITOR_COUNTER); + MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT); + MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT); + MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(QUERY_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(MODIFY_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(CREATE_QOS_PARA_VPORT); + MLX5_COMMAND_STR_CASE(DESTROY_QOS_PARA_VPORT); + MLX5_COMMAND_STR_CASE(ALLOC_PD); + MLX5_COMMAND_STR_CASE(DEALLOC_PD); + MLX5_COMMAND_STR_CASE(ALLOC_UAR); + MLX5_COMMAND_STR_CASE(DEALLOC_UAR); + MLX5_COMMAND_STR_CASE(CONFIG_INT_MODERATION); + MLX5_COMMAND_STR_CASE(ACCESS_REG); + MLX5_COMMAND_STR_CASE(ATTACH_TO_MCG); + MLX5_COMMAND_STR_CASE(DETACH_FROM_MCG); + MLX5_COMMAND_STR_CASE(GET_DROPPED_PACKET_LOG); + MLX5_COMMAND_STR_CASE(MAD_IFC); + MLX5_COMMAND_STR_CASE(QUERY_MAD_DEMUX); + MLX5_COMMAND_STR_CASE(SET_MAD_DEMUX); + MLX5_COMMAND_STR_CASE(NOP); + MLX5_COMMAND_STR_CASE(ALLOC_XRCD); + MLX5_COMMAND_STR_CASE(DEALLOC_XRCD); + MLX5_COMMAND_STR_CASE(ALLOC_TRANSPORT_DOMAIN); + MLX5_COMMAND_STR_CASE(DEALLOC_TRANSPORT_DOMAIN); + MLX5_COMMAND_STR_CASE(QUERY_CONG_STATUS); + MLX5_COMMAND_STR_CASE(MODIFY_CONG_STATUS); + MLX5_COMMAND_STR_CASE(QUERY_CONG_PARAMS); + MLX5_COMMAND_STR_CASE(MODIFY_CONG_PARAMS); + MLX5_COMMAND_STR_CASE(QUERY_CONG_STATISTICS); + MLX5_COMMAND_STR_CASE(ADD_VXLAN_UDP_DPORT); + MLX5_COMMAND_STR_CASE(DELETE_VXLAN_UDP_DPORT); + MLX5_COMMAND_STR_CASE(SET_L2_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(QUERY_L2_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(DELETE_L2_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(SET_WOL_ROL); + MLX5_COMMAND_STR_CASE(QUERY_WOL_ROL); + MLX5_COMMAND_STR_CASE(CREATE_LAG); + MLX5_COMMAND_STR_CASE(MODIFY_LAG); + MLX5_COMMAND_STR_CASE(QUERY_LAG); + MLX5_COMMAND_STR_CASE(DESTROY_LAG); + MLX5_COMMAND_STR_CASE(CREATE_VPORT_LAG); + MLX5_COMMAND_STR_CASE(DESTROY_VPORT_LAG); + MLX5_COMMAND_STR_CASE(CREATE_TIR); + MLX5_COMMAND_STR_CASE(MODIFY_TIR); + MLX5_COMMAND_STR_CASE(DESTROY_TIR); + MLX5_COMMAND_STR_CASE(QUERY_TIR); + MLX5_COMMAND_STR_CASE(CREATE_SQ); + MLX5_COMMAND_STR_CASE(MODIFY_SQ); + MLX5_COMMAND_STR_CASE(DESTROY_SQ); + MLX5_COMMAND_STR_CASE(QUERY_SQ); + MLX5_COMMAND_STR_CASE(CREATE_RQ); + MLX5_COMMAND_STR_CASE(MODIFY_RQ); + MLX5_COMMAND_STR_CASE(DESTROY_RQ); + MLX5_COMMAND_STR_CASE(QUERY_RQ); + MLX5_COMMAND_STR_CASE(CREATE_RMP); + MLX5_COMMAND_STR_CASE(MODIFY_RMP); + MLX5_COMMAND_STR_CASE(DESTROY_RMP); + MLX5_COMMAND_STR_CASE(QUERY_RMP); + MLX5_COMMAND_STR_CASE(CREATE_TIS); + MLX5_COMMAND_STR_CASE(MODIFY_TIS); + MLX5_COMMAND_STR_CASE(DESTROY_TIS); + MLX5_COMMAND_STR_CASE(QUERY_TIS); + MLX5_COMMAND_STR_CASE(CREATE_RQT); + MLX5_COMMAND_STR_CASE(MODIFY_RQT); + MLX5_COMMAND_STR_CASE(DESTROY_RQT); + MLX5_COMMAND_STR_CASE(QUERY_RQT); + MLX5_COMMAND_STR_CASE(SET_FLOW_TABLE_ROOT); + MLX5_COMMAND_STR_CASE(CREATE_FLOW_TABLE); + MLX5_COMMAND_STR_CASE(DESTROY_FLOW_TABLE); + MLX5_COMMAND_STR_CASE(QUERY_FLOW_TABLE); + MLX5_COMMAND_STR_CASE(CREATE_FLOW_GROUP); + MLX5_COMMAND_STR_CASE(DESTROY_FLOW_GROUP); + MLX5_COMMAND_STR_CASE(QUERY_FLOW_GROUP); + MLX5_COMMAND_STR_CASE(SET_FLOW_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(QUERY_FLOW_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(DELETE_FLOW_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(ALLOC_FLOW_COUNTER); + MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER); + MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER); + MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE); + MLX5_COMMAND_STR_CASE(ALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_COMMAND_STR_CASE(DEALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_COMMAND_STR_CASE(ALLOC_MODIFY_HEADER_CONTEXT); + MLX5_COMMAND_STR_CASE(DEALLOC_MODIFY_HEADER_CONTEXT); + MLX5_COMMAND_STR_CASE(FPGA_CREATE_QP); + MLX5_COMMAND_STR_CASE(FPGA_MODIFY_QP); + MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP); + MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP_COUNTERS); + MLX5_COMMAND_STR_CASE(FPGA_DESTROY_QP); + MLX5_COMMAND_STR_CASE(CREATE_XRQ); + MLX5_COMMAND_STR_CASE(DESTROY_XRQ); + MLX5_COMMAND_STR_CASE(QUERY_XRQ); + MLX5_COMMAND_STR_CASE(ARM_XRQ); + MLX5_COMMAND_STR_CASE(CREATE_GENERAL_OBJECT); + MLX5_COMMAND_STR_CASE(DESTROY_GENERAL_OBJECT); + MLX5_COMMAND_STR_CASE(MODIFY_GENERAL_OBJECT); + MLX5_COMMAND_STR_CASE(QUERY_GENERAL_OBJECT); + MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT); + MLX5_COMMAND_STR_CASE(ALLOC_MEMIC); + MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC); + MLX5_COMMAND_STR_CASE(QUERY_ESW_FUNCTIONS); + MLX5_COMMAND_STR_CASE(CREATE_UCTX); + MLX5_COMMAND_STR_CASE(DESTROY_UCTX); + MLX5_COMMAND_STR_CASE(CREATE_UMEM); + MLX5_COMMAND_STR_CASE(DESTROY_UMEM); + MLX5_COMMAND_STR_CASE(RELEASE_XRQ_ERROR); + MLX5_COMMAND_STR_CASE(MODIFY_XRQ); + MLX5_COMMAND_STR_CASE(QUERY_VHCA_STATE); + MLX5_COMMAND_STR_CASE(MODIFY_VHCA_STATE); + MLX5_COMMAND_STR_CASE(ALLOC_SF); + MLX5_COMMAND_STR_CASE(DEALLOC_SF); + MLX5_COMMAND_STR_CASE(SUSPEND_VHCA); + MLX5_COMMAND_STR_CASE(RESUME_VHCA); + MLX5_COMMAND_STR_CASE(QUERY_VHCA_MIGRATION_STATE); + MLX5_COMMAND_STR_CASE(SAVE_VHCA_STATE); + MLX5_COMMAND_STR_CASE(LOAD_VHCA_STATE); + MLX5_COMMAND_STR_CASE(SYNC_CRYPTO); + default: return "unknown command opcode"; + } +} + +static const char *cmd_status_str(u8 status) +{ + switch (status) { + case MLX5_CMD_STAT_OK: + return "OK"; + case MLX5_CMD_STAT_INT_ERR: + return "internal error"; + case MLX5_CMD_STAT_BAD_OP_ERR: + return "bad operation"; + case MLX5_CMD_STAT_BAD_PARAM_ERR: + return "bad parameter"; + case MLX5_CMD_STAT_BAD_SYS_STATE_ERR: + return "bad system state"; + case MLX5_CMD_STAT_BAD_RES_ERR: + return "bad resource"; + case MLX5_CMD_STAT_RES_BUSY: + return "resource busy"; + case MLX5_CMD_STAT_LIM_ERR: + return "limits exceeded"; + case MLX5_CMD_STAT_BAD_RES_STATE_ERR: + return "bad resource state"; + case MLX5_CMD_STAT_IX_ERR: + return "bad index"; + case MLX5_CMD_STAT_NO_RES_ERR: + return "no resources"; + case MLX5_CMD_STAT_BAD_INP_LEN_ERR: + return "bad input length"; + case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR: + return "bad output length"; + case MLX5_CMD_STAT_BAD_QP_STATE_ERR: + return "bad QP state"; + case MLX5_CMD_STAT_BAD_PKT_ERR: + return "bad packet (discarded)"; + case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR: + return "bad size too many outstanding CQEs"; + default: + return "unknown status"; + } +} + +static int cmd_status_to_err(u8 status) +{ + switch (status) { + case MLX5_CMD_STAT_OK: return 0; + case MLX5_CMD_STAT_INT_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_OP_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_PARAM_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_SYS_STATE_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_RES_ERR: return -EINVAL; + case MLX5_CMD_STAT_RES_BUSY: return -EBUSY; + case MLX5_CMD_STAT_LIM_ERR: return -ENOMEM; + case MLX5_CMD_STAT_BAD_RES_STATE_ERR: return -EINVAL; + case MLX5_CMD_STAT_IX_ERR: return -EINVAL; + case MLX5_CMD_STAT_NO_RES_ERR: return -EAGAIN; + case MLX5_CMD_STAT_BAD_INP_LEN_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_QP_STATE_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_PKT_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR: return -EINVAL; + default: return -EIO; + } +} + +void mlx5_cmd_out_err(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod, void *out) +{ + u32 syndrome = MLX5_GET(mbox_out, out, syndrome); + u8 status = MLX5_GET(mbox_out, out, status); + + mlx5_core_err_rl(dev, + "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x), err(%d)\n", + mlx5_command_str(opcode), opcode, op_mod, + cmd_status_str(status), status, syndrome, cmd_status_to_err(status)); +} +EXPORT_SYMBOL(mlx5_cmd_out_err); + +static void cmd_status_print(struct mlx5_core_dev *dev, void *in, void *out) +{ + u16 opcode, op_mod; + u16 uid; + + opcode = in_to_opcode(in); + op_mod = MLX5_GET(mbox_in, in, op_mod); + uid = MLX5_GET(mbox_in, in, uid); + + if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY && + opcode != MLX5_CMD_OP_CREATE_UCTX) + mlx5_cmd_out_err(dev, opcode, op_mod, out); +} + +int mlx5_cmd_check(struct mlx5_core_dev *dev, int err, void *in, void *out) +{ + /* aborted due to PCI error or via reset flow mlx5_cmd_trigger_completions() */ + if (err == -ENXIO) { + u16 opcode = in_to_opcode(in); + u32 syndrome; + u8 status; + + /* PCI Error, emulate command return status, for smooth reset */ + err = mlx5_internal_err_ret_value(dev, opcode, &syndrome, &status); + MLX5_SET(mbox_out, out, status, status); + MLX5_SET(mbox_out, out, syndrome, syndrome); + if (!err) + return 0; + } + + /* driver or FW delivery error */ + if (err != -EREMOTEIO && err) + return err; + + /* check outbox status */ + err = cmd_status_to_err(MLX5_GET(mbox_out, out, status)); + if (err) + cmd_status_print(dev, in, out); + + return err; +} +EXPORT_SYMBOL(mlx5_cmd_check); + +static void dump_command(struct mlx5_core_dev *dev, + struct mlx5_cmd_work_ent *ent, int input) +{ + struct mlx5_cmd_msg *msg = input ? ent->in : ent->out; + struct mlx5_cmd_mailbox *next = msg->next; + int n = mlx5_calc_cmd_blocks(msg); + u16 op = ent->op; + int data_only; + u32 offset = 0; + int dump_len; + int i; + + mlx5_core_dbg(dev, "cmd[%d]: start dump\n", ent->idx); + data_only = !!(mlx5_core_debug_mask & (1 << MLX5_CMD_DATA)); + + if (data_only) + mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_DATA, + "cmd[%d]: dump command data %s(0x%x) %s\n", + ent->idx, mlx5_command_str(op), op, + input ? "INPUT" : "OUTPUT"); + else + mlx5_core_dbg(dev, "cmd[%d]: dump command %s(0x%x) %s\n", + ent->idx, mlx5_command_str(op), op, + input ? "INPUT" : "OUTPUT"); + + if (data_only) { + if (input) { + dump_buf(ent->lay->in, sizeof(ent->lay->in), 1, offset, ent->idx); + offset += sizeof(ent->lay->in); + } else { + dump_buf(ent->lay->out, sizeof(ent->lay->out), 1, offset, ent->idx); + offset += sizeof(ent->lay->out); + } + } else { + dump_buf(ent->lay, sizeof(*ent->lay), 0, offset, ent->idx); + offset += sizeof(*ent->lay); + } + + for (i = 0; i < n && next; i++) { + if (data_only) { + dump_len = min_t(int, MLX5_CMD_DATA_BLOCK_SIZE, msg->len - offset); + dump_buf(next->buf, dump_len, 1, offset, ent->idx); + offset += MLX5_CMD_DATA_BLOCK_SIZE; + } else { + mlx5_core_dbg(dev, "cmd[%d]: command block:\n", ent->idx); + dump_buf(next->buf, sizeof(struct mlx5_cmd_prot_block), 0, offset, + ent->idx); + offset += sizeof(struct mlx5_cmd_prot_block); + } + next = next->next; + } + + if (data_only) + pr_debug("\n"); + + mlx5_core_dbg(dev, "cmd[%d]: end dump\n", ent->idx); +} + +static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); + +static void cb_timeout_handler(struct work_struct *work) +{ + struct delayed_work *dwork = container_of(work, struct delayed_work, + work); + struct mlx5_cmd_work_ent *ent = container_of(dwork, + struct mlx5_cmd_work_ent, + cb_timeout_work); + struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev, + cmd); + + mlx5_cmd_eq_recover(dev); + + /* Maybe got handled by eq recover ? */ + if (!test_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state)) { + mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, recovered after timeout\n", ent->idx, + mlx5_command_str(ent->op), ent->op); + goto out; /* phew, already handled */ + } + + ent->ret = -ETIMEDOUT; + mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, timeout. Will cause a leak of a command resource\n", + ent->idx, mlx5_command_str(ent->op), ent->op); + mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true); + +out: + cmd_ent_put(ent); /* for the cmd_ent_get() took on schedule delayed work */ +} + +static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg); +static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev, + struct mlx5_cmd_msg *msg); + +static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode) +{ + if (cmd->allowed_opcode == CMD_ALLOWED_OPCODE_ALL) + return true; + + return cmd->allowed_opcode == opcode; +} + +bool mlx5_cmd_is_down(struct mlx5_core_dev *dev) +{ + return pci_channel_offline(dev->pdev) || + dev->cmd.state != MLX5_CMDIF_STATE_UP || + dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR; +} + +static void cmd_work_handler(struct work_struct *work) +{ + struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); + struct mlx5_cmd *cmd = ent->cmd; + bool poll_cmd = ent->polling; + struct mlx5_cmd_layout *lay; + struct mlx5_core_dev *dev; + unsigned long cb_timeout; + struct semaphore *sem; + unsigned long flags; + int alloc_ret; + int cmd_mode; + + dev = container_of(cmd, struct mlx5_core_dev, cmd); + cb_timeout = msecs_to_jiffies(mlx5_tout_ms(dev, CMD)); + + complete(&ent->handling); + sem = ent->page_queue ? &cmd->vars.pages_sem : &cmd->vars.sem; + down(sem); + if (!ent->page_queue) { + alloc_ret = cmd_alloc_index(cmd, ent); + if (alloc_ret < 0) { + mlx5_core_err_rl(dev, "failed to allocate command entry\n"); + if (ent->callback) { + ent->callback(-EAGAIN, ent->context); + mlx5_free_cmd_msg(dev, ent->out); + free_msg(dev, ent->in); + cmd_ent_put(ent); + } else { + ent->ret = -EAGAIN; + complete(&ent->done); + } + up(sem); + return; + } + } else { + ent->idx = cmd->vars.max_reg_cmds; + spin_lock_irqsave(&cmd->alloc_lock, flags); + clear_bit(ent->idx, &cmd->vars.bitmask); + cmd->ent_arr[ent->idx] = ent; + spin_unlock_irqrestore(&cmd->alloc_lock, flags); + } + + lay = get_inst(cmd, ent->idx); + ent->lay = lay; + memset(lay, 0, sizeof(*lay)); + memcpy(lay->in, ent->in->first.data, sizeof(lay->in)); + if (ent->in->next) + lay->in_ptr = cpu_to_be64(ent->in->next->dma); + lay->inlen = cpu_to_be32(ent->in->len); + if (ent->out->next) + lay->out_ptr = cpu_to_be64(ent->out->next->dma); + lay->outlen = cpu_to_be32(ent->out->len); + lay->type = MLX5_PCI_CMD_XPORT; + lay->token = ent->token; + lay->status_own = CMD_OWNER_HW; + set_signature(ent, !cmd->checksum_disabled); + dump_command(dev, ent, 1); + ent->ts1 = ktime_get_ns(); + cmd_mode = cmd->mode; + + if (ent->callback && schedule_delayed_work(&ent->cb_timeout_work, cb_timeout)) + cmd_ent_get(ent); + set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state); + + cmd_ent_get(ent); /* for the _real_ FW event on completion */ + /* Skip sending command to fw if internal error */ + if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) { + ent->ret = -ENXIO; + mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true); + return; + } + + /* ring doorbell after the descriptor is valid */ + mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx); + wmb(); + iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell); + /* if not in polling don't use ent after this point */ + if (cmd_mode == CMD_MODE_POLLING || poll_cmd) { + poll_timeout(ent); + /* make sure we read the descriptor after ownership is SW */ + rmb(); + mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, (ent->ret == -ETIMEDOUT)); + } +} + +static int deliv_status_to_err(u8 status) +{ + switch (status) { + case MLX5_CMD_DELIVERY_STAT_OK: + case MLX5_DRIVER_STATUS_ABORTED: + return 0; + case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR: + case MLX5_CMD_DELIVERY_STAT_TOK_ERR: + return -EBADR; + case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR: + case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR: + case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR: + return -EFAULT; /* Bad address */ + case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR: + case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR: + case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR: + case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR: + return -ENOMSG; + case MLX5_CMD_DELIVERY_STAT_FW_ERR: + return -EIO; + default: + return -EINVAL; + } +} + +static const char *deliv_status_to_str(u8 status) +{ + switch (status) { + case MLX5_CMD_DELIVERY_STAT_OK: + return "no errors"; + case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR: + return "signature error"; + case MLX5_CMD_DELIVERY_STAT_TOK_ERR: + return "token error"; + case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR: + return "bad block number"; + case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR: + return "output pointer not aligned to block size"; + case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR: + return "input pointer not aligned to block size"; + case MLX5_CMD_DELIVERY_STAT_FW_ERR: + return "firmware internal error"; + case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR: + return "command input length error"; + case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR: + return "command output length error"; + case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR: + return "reserved fields not cleared"; + case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR: + return "bad command descriptor type"; + default: + return "unknown status code"; + } +} + +enum { + MLX5_CMD_TIMEOUT_RECOVER_MSEC = 5 * 1000, +}; + +static void wait_func_handle_exec_timeout(struct mlx5_core_dev *dev, + struct mlx5_cmd_work_ent *ent) +{ + unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_RECOVER_MSEC); + + mlx5_cmd_eq_recover(dev); + + /* Re-wait on the ent->done after executing the recovery flow. If the + * recovery flow (or any other recovery flow running simultaneously) + * has recovered an EQE, it should cause the entry to be completed by + * the command interface. + */ + if (wait_for_completion_timeout(&ent->done, timeout)) { + mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) recovered after timeout\n", ent->idx, + mlx5_command_str(ent->op), ent->op); + return; + } + + mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) No done completion\n", ent->idx, + mlx5_command_str(ent->op), ent->op); + + ent->ret = -ETIMEDOUT; + mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true); +} + +static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) +{ + unsigned long timeout = msecs_to_jiffies(mlx5_tout_ms(dev, CMD)); + struct mlx5_cmd *cmd = &dev->cmd; + int err; + + if (!wait_for_completion_timeout(&ent->handling, timeout) && + cancel_work_sync(&ent->work)) { + ent->ret = -ECANCELED; + goto out_err; + } + if (cmd->mode == CMD_MODE_POLLING || ent->polling) + wait_for_completion(&ent->done); + else if (!wait_for_completion_timeout(&ent->done, timeout)) + wait_func_handle_exec_timeout(dev, ent); + +out_err: + err = ent->ret; + + if (err == -ETIMEDOUT) { + mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", + mlx5_command_str(ent->op), ent->op); + } else if (err == -ECANCELED) { + mlx5_core_warn(dev, "%s(0x%x) canceled on out of queue timeout.\n", + mlx5_command_str(ent->op), ent->op); + } + mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n", + err, deliv_status_to_str(ent->status), ent->status); + + return err; +} + +/* Notes: + * 1. Callback functions may not sleep + * 2. page queue commands do not support asynchrous completion + * + * return value in case (!callback): + * ret < 0 : Command execution couldn't be submitted by driver + * ret > 0 : Command execution couldn't be performed by firmware + * ret == 0: Command was executed by FW, Caller must check FW outbox status. + * + * return value in case (callback): + * ret < 0 : Command execution couldn't be submitted by driver + * ret == 0: Command will be submitted to FW for execution + * and the callback will be called for further status updates + */ +static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, + struct mlx5_cmd_msg *out, void *uout, int uout_size, + mlx5_cmd_cbk_t callback, + void *context, int page_queue, + u8 token, bool force_polling) +{ + struct mlx5_cmd *cmd = &dev->cmd; + struct mlx5_cmd_work_ent *ent; + struct mlx5_cmd_stats *stats; + u8 status = 0; + int err = 0; + s64 ds; + + if (callback && page_queue) + return -EINVAL; + + ent = cmd_alloc_ent(cmd, in, out, uout, uout_size, + callback, context, page_queue); + if (IS_ERR(ent)) + return PTR_ERR(ent); + + /* put for this ent is when consumed, depending on the use case + * 1) (!callback) blocking flow: by caller after wait_func completes + * 2) (callback) flow: by mlx5_cmd_comp_handler() when ent is handled + */ + + ent->token = token; + ent->polling = force_polling; + + init_completion(&ent->handling); + if (!callback) + init_completion(&ent->done); + + INIT_DELAYED_WORK(&ent->cb_timeout_work, cb_timeout_handler); + INIT_WORK(&ent->work, cmd_work_handler); + if (page_queue) { + cmd_work_handler(&ent->work); + } else if (!queue_work(cmd->wq, &ent->work)) { + mlx5_core_warn(dev, "failed to queue work\n"); + err = -EALREADY; + goto out_free; + } + + if (callback) + return 0; /* mlx5_cmd_comp_handler() will put(ent) */ + + err = wait_func(dev, ent); + if (err == -ETIMEDOUT || err == -ECANCELED) + goto out_free; + + ds = ent->ts2 - ent->ts1; + stats = xa_load(&cmd->stats, ent->op); + if (stats) { + spin_lock_irq(&stats->lock); + stats->sum += ds; + ++stats->n; + spin_unlock_irq(&stats->lock); + } + mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME, + "fw exec time for %s is %lld nsec\n", + mlx5_command_str(ent->op), ds); + +out_free: + status = ent->status; + cmd_ent_put(ent); + return err ? : status; +} + +static ssize_t dbg_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + char lbuf[3]; + int err; + + if (!dbg->in_msg || !dbg->out_msg) + return -ENOMEM; + + if (count < sizeof(lbuf) - 1) + return -EINVAL; + + if (copy_from_user(lbuf, buf, sizeof(lbuf) - 1)) + return -EFAULT; + + lbuf[sizeof(lbuf) - 1] = 0; + + if (strcmp(lbuf, "go")) + return -EINVAL; + + err = mlx5_cmd_exec(dev, dbg->in_msg, dbg->inlen, dbg->out_msg, dbg->outlen); + + return err ? err : count; +} + +static const struct file_operations fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = dbg_write, +}; + +static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size, + u8 token) +{ + struct mlx5_cmd_prot_block *block; + struct mlx5_cmd_mailbox *next; + int copy; + + if (!to || !from) + return -ENOMEM; + + copy = min_t(int, size, sizeof(to->first.data)); + memcpy(to->first.data, from, copy); + size -= copy; + from += copy; + + next = to->next; + while (size) { + if (!next) { + /* this is a BUG */ + return -ENOMEM; + } + + copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE); + block = next->buf; + memcpy(block->data, from, copy); + from += copy; + size -= copy; + block->token = token; + next = next->next; + } + + return 0; +} + +static int mlx5_copy_from_msg(void *to, struct mlx5_cmd_msg *from, int size) +{ + struct mlx5_cmd_prot_block *block; + struct mlx5_cmd_mailbox *next; + int copy; + + if (!to || !from) + return -ENOMEM; + + copy = min_t(int, size, sizeof(from->first.data)); + memcpy(to, from->first.data, copy); + size -= copy; + to += copy; + + next = from->next; + while (size) { + if (!next) { + /* this is a BUG */ + return -ENOMEM; + } + + copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE); + block = next->buf; + + memcpy(to, block->data, copy); + to += copy; + size -= copy; + next = next->next; + } + + return 0; +} + +static struct mlx5_cmd_mailbox *alloc_cmd_box(struct mlx5_core_dev *dev, + gfp_t flags) +{ + struct mlx5_cmd_mailbox *mailbox; + + mailbox = kmalloc(sizeof(*mailbox), flags); + if (!mailbox) + return ERR_PTR(-ENOMEM); + + mailbox->buf = dma_pool_zalloc(dev->cmd.pool, flags, + &mailbox->dma); + if (!mailbox->buf) { + mlx5_core_dbg(dev, "failed allocation\n"); + kfree(mailbox); + return ERR_PTR(-ENOMEM); + } + mailbox->next = NULL; + + return mailbox; +} + +static void free_cmd_box(struct mlx5_core_dev *dev, + struct mlx5_cmd_mailbox *mailbox) +{ + dma_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma); + kfree(mailbox); +} + +static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev, + gfp_t flags, int size, + u8 token) +{ + struct mlx5_cmd_mailbox *tmp, *head = NULL; + struct mlx5_cmd_prot_block *block; + struct mlx5_cmd_msg *msg; + int err; + int n; + int i; + + msg = kzalloc(sizeof(*msg), flags); + if (!msg) + return ERR_PTR(-ENOMEM); + + msg->len = size; + n = mlx5_calc_cmd_blocks(msg); + + for (i = 0; i < n; i++) { + tmp = alloc_cmd_box(dev, flags); + if (IS_ERR(tmp)) { + mlx5_core_warn(dev, "failed allocating block\n"); + err = PTR_ERR(tmp); + goto err_alloc; + } + + block = tmp->buf; + tmp->next = head; + block->next = cpu_to_be64(tmp->next ? tmp->next->dma : 0); + block->block_num = cpu_to_be32(n - i - 1); + block->token = token; + head = tmp; + } + msg->next = head; + return msg; + +err_alloc: + while (head) { + tmp = head->next; + free_cmd_box(dev, head); + head = tmp; + } + kfree(msg); + + return ERR_PTR(err); +} + +static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev, + struct mlx5_cmd_msg *msg) +{ + struct mlx5_cmd_mailbox *head = msg->next; + struct mlx5_cmd_mailbox *next; + + while (head) { + next = head->next; + free_cmd_box(dev, head); + head = next; + } + kfree(msg); +} + +static ssize_t data_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + void *ptr; + + if (*pos != 0) + return -EINVAL; + + kfree(dbg->in_msg); + dbg->in_msg = NULL; + dbg->inlen = 0; + ptr = memdup_user(buf, count); + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + dbg->in_msg = ptr; + dbg->inlen = count; + + *pos = count; + + return count; +} + +static ssize_t data_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + + if (!dbg->out_msg) + return -ENOMEM; + + return simple_read_from_buffer(buf, count, pos, dbg->out_msg, + dbg->outlen); +} + +static const struct file_operations dfops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = data_write, + .read = data_read, +}; + +static ssize_t outlen_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + char outlen[8]; + int err; + + err = snprintf(outlen, sizeof(outlen), "%d", dbg->outlen); + if (err < 0) + return err; + + return simple_read_from_buffer(buf, count, pos, outlen, err); +} + +static ssize_t outlen_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + char outlen_str[8] = {0}; + int outlen; + void *ptr; + int err; + + if (*pos != 0 || count > 6) + return -EINVAL; + + kfree(dbg->out_msg); + dbg->out_msg = NULL; + dbg->outlen = 0; + + if (copy_from_user(outlen_str, buf, count)) + return -EFAULT; + + err = sscanf(outlen_str, "%d", &outlen); + if (err != 1) + return -EINVAL; + + ptr = kzalloc(outlen, GFP_KERNEL); + if (!ptr) + return -ENOMEM; + + dbg->out_msg = ptr; + dbg->outlen = outlen; + + *pos = count; + + return count; +} + +static const struct file_operations olfops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = outlen_write, + .read = outlen_read, +}; + +static void set_wqname(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + + snprintf(cmd->wq_name, sizeof(cmd->wq_name), "mlx5_cmd_%s", + dev_name(dev->device)); +} + +static void clean_debug_files(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + + if (!mlx5_debugfs_root) + return; + + debugfs_remove_recursive(dbg->dbg_root); +} + +static void create_debugfs_files(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + + dbg->dbg_root = debugfs_create_dir("cmd", mlx5_debugfs_get_dev_root(dev)); + + debugfs_create_file("in", 0400, dbg->dbg_root, dev, &dfops); + debugfs_create_file("out", 0200, dbg->dbg_root, dev, &dfops); + debugfs_create_file("out_len", 0600, dbg->dbg_root, dev, &olfops); + debugfs_create_u8("status", 0600, dbg->dbg_root, &dbg->status); + debugfs_create_file("run", 0200, dbg->dbg_root, dev, &fops); +} + +void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode) +{ + struct mlx5_cmd *cmd = &dev->cmd; + int i; + + for (i = 0; i < cmd->vars.max_reg_cmds; i++) + down(&cmd->vars.sem); + down(&cmd->vars.pages_sem); + + cmd->allowed_opcode = opcode; + + up(&cmd->vars.pages_sem); + for (i = 0; i < cmd->vars.max_reg_cmds; i++) + up(&cmd->vars.sem); +} + +static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode) +{ + struct mlx5_cmd *cmd = &dev->cmd; + int i; + + for (i = 0; i < cmd->vars.max_reg_cmds; i++) + down(&cmd->vars.sem); + down(&cmd->vars.pages_sem); + + cmd->mode = mode; + + up(&cmd->vars.pages_sem); + for (i = 0; i < cmd->vars.max_reg_cmds; i++) + up(&cmd->vars.sem); +} + +static int cmd_comp_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_core_dev *dev; + struct mlx5_cmd *cmd; + struct mlx5_eqe *eqe; + + cmd = mlx5_nb_cof(nb, struct mlx5_cmd, nb); + dev = container_of(cmd, struct mlx5_core_dev, cmd); + eqe = data; + + mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false); + + return NOTIFY_OK; +} +void mlx5_cmd_use_events(struct mlx5_core_dev *dev) +{ + MLX5_NB_INIT(&dev->cmd.nb, cmd_comp_notifier, CMD); + mlx5_eq_notifier_register(dev, &dev->cmd.nb); + mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS); +} + +void mlx5_cmd_use_polling(struct mlx5_core_dev *dev) +{ + mlx5_cmd_change_mod(dev, CMD_MODE_POLLING); + mlx5_eq_notifier_unregister(dev, &dev->cmd.nb); +} + +static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) +{ + unsigned long flags; + + if (msg->parent) { + spin_lock_irqsave(&msg->parent->lock, flags); + list_add_tail(&msg->list, &msg->parent->head); + spin_unlock_irqrestore(&msg->parent->lock, flags); + } else { + mlx5_free_cmd_msg(dev, msg); + } +} + +static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced) +{ + struct mlx5_cmd *cmd = &dev->cmd; + struct mlx5_cmd_work_ent *ent; + mlx5_cmd_cbk_t callback; + void *context; + int err; + int i; + s64 ds; + struct mlx5_cmd_stats *stats; + unsigned long flags; + unsigned long vector; + + /* there can be at most 32 command queues */ + vector = vec & 0xffffffff; + for (i = 0; i < (1 << cmd->vars.log_sz); i++) { + if (test_bit(i, &vector)) { + ent = cmd->ent_arr[i]; + + /* if we already completed the command, ignore it */ + if (!test_and_clear_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, + &ent->state)) { + /* only real completion can free the cmd slot */ + if (!forced) { + mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n", + ent->idx); + cmd_ent_put(ent); + } + continue; + } + + if (ent->callback && cancel_delayed_work(&ent->cb_timeout_work)) + cmd_ent_put(ent); /* timeout work was canceled */ + + if (!forced || /* Real FW completion */ + mlx5_cmd_is_down(dev) || /* No real FW completion is expected */ + !opcode_allowed(cmd, ent->op)) + cmd_ent_put(ent); + + ent->ts2 = ktime_get_ns(); + memcpy(ent->out->first.data, ent->lay->out, sizeof(ent->lay->out)); + dump_command(dev, ent, 0); + + if (vec & MLX5_TRIGGERED_CMD_COMP) + ent->ret = -ENXIO; + + if (!ent->ret) { /* Command completed by FW */ + if (!cmd->checksum_disabled) + ent->ret = verify_signature(ent); + + ent->status = ent->lay->status_own >> 1; + + mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n", + ent->ret, deliv_status_to_str(ent->status), ent->status); + } + + if (ent->callback) { + ds = ent->ts2 - ent->ts1; + stats = xa_load(&cmd->stats, ent->op); + if (stats) { + spin_lock_irqsave(&stats->lock, flags); + stats->sum += ds; + ++stats->n; + spin_unlock_irqrestore(&stats->lock, flags); + } + + callback = ent->callback; + context = ent->context; + err = ent->ret ? : ent->status; + if (err > 0) /* Failed in FW, command didn't execute */ + err = deliv_status_to_err(err); + + if (!err) + err = mlx5_copy_from_msg(ent->uout, + ent->out, + ent->uout_size); + + mlx5_free_cmd_msg(dev, ent->out); + free_msg(dev, ent->in); + + /* final consumer is done, release ent */ + cmd_ent_put(ent); + callback(err, context); + } else { + /* release wait_func() so mlx5_cmd_invoke() + * can make the final ent_put() + */ + complete(&ent->done); + } + } + } +} + +static void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + unsigned long bitmask; + unsigned long flags; + u64 vector; + int i; + + /* wait for pending handlers to complete */ + mlx5_eq_synchronize_cmd_irq(dev); + spin_lock_irqsave(&dev->cmd.alloc_lock, flags); + vector = ~dev->cmd.vars.bitmask & ((1ul << (1 << dev->cmd.vars.log_sz)) - 1); + if (!vector) + goto no_trig; + + bitmask = vector; + /* we must increment the allocated entries refcount before triggering the completions + * to guarantee pending commands will not get freed in the meanwhile. + * For that reason, it also has to be done inside the alloc_lock. + */ + for_each_set_bit(i, &bitmask, (1 << cmd->vars.log_sz)) + cmd_ent_get(cmd->ent_arr[i]); + vector |= MLX5_TRIGGERED_CMD_COMP; + spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); + + mlx5_core_dbg(dev, "vector 0x%llx\n", vector); + mlx5_cmd_comp_handler(dev, vector, true); + for_each_set_bit(i, &bitmask, (1 << cmd->vars.log_sz)) + cmd_ent_put(cmd->ent_arr[i]); + return; + +no_trig: + spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); +} + +void mlx5_cmd_flush(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + int i; + + for (i = 0; i < cmd->vars.max_reg_cmds; i++) { + while (down_trylock(&cmd->vars.sem)) { + mlx5_cmd_trigger_completions(dev); + cond_resched(); + } + } + + while (down_trylock(&cmd->vars.pages_sem)) { + mlx5_cmd_trigger_completions(dev); + cond_resched(); + } + + /* Unlock cmdif */ + up(&cmd->vars.pages_sem); + for (i = 0; i < cmd->vars.max_reg_cmds; i++) + up(&cmd->vars.sem); +} + +static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size, + gfp_t gfp) +{ + struct mlx5_cmd_msg *msg = ERR_PTR(-ENOMEM); + struct cmd_msg_cache *ch = NULL; + struct mlx5_cmd *cmd = &dev->cmd; + int i; + + if (in_size <= 16) + goto cache_miss; + + for (i = 0; i < dev->profile.num_cmd_caches; i++) { + ch = &cmd->cache[i]; + if (in_size > ch->max_inbox_size) + continue; + spin_lock_irq(&ch->lock); + if (list_empty(&ch->head)) { + spin_unlock_irq(&ch->lock); + continue; + } + msg = list_entry(ch->head.next, typeof(*msg), list); + /* For cached lists, we must explicitly state what is + * the real size + */ + msg->len = in_size; + list_del(&msg->list); + spin_unlock_irq(&ch->lock); + break; + } + + if (!IS_ERR(msg)) + return msg; + +cache_miss: + msg = mlx5_alloc_cmd_msg(dev, gfp, in_size, 0); + return msg; +} + +static int is_manage_pages(void *in) +{ + return in_to_opcode(in) == MLX5_CMD_OP_MANAGE_PAGES; +} + +/* Notes: + * 1. Callback functions may not sleep + * 2. Page queue commands do not support asynchrous completion + */ +static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, + int out_size, mlx5_cmd_cbk_t callback, void *context, + bool force_polling) +{ + struct mlx5_cmd_msg *inb, *outb; + u16 opcode = in_to_opcode(in); + bool throttle_op; + int pages_queue; + gfp_t gfp; + u8 token; + int err; + + if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, opcode)) + return -ENXIO; + + throttle_op = mlx5_cmd_is_throttle_opcode(opcode); + if (throttle_op) { + /* atomic context may not sleep */ + if (callback) + return -EINVAL; + down(&dev->cmd.vars.throttle_sem); + } + + pages_queue = is_manage_pages(in); + gfp = callback ? GFP_ATOMIC : GFP_KERNEL; + + inb = alloc_msg(dev, in_size, gfp); + if (IS_ERR(inb)) { + err = PTR_ERR(inb); + goto out_up; + } + + token = alloc_token(&dev->cmd); + + err = mlx5_copy_to_msg(inb, in, in_size, token); + if (err) { + mlx5_core_warn(dev, "err %d\n", err); + goto out_in; + } + + outb = mlx5_alloc_cmd_msg(dev, gfp, out_size, token); + if (IS_ERR(outb)) { + err = PTR_ERR(outb); + goto out_in; + } + + err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context, + pages_queue, token, force_polling); + if (callback) + return err; + + if (err > 0) /* Failed in FW, command didn't execute */ + err = deliv_status_to_err(err); + + if (err) + goto out_out; + + /* command completed by FW */ + err = mlx5_copy_from_msg(out, outb, out_size); +out_out: + mlx5_free_cmd_msg(dev, outb); +out_in: + free_msg(dev, inb); +out_up: + if (throttle_op) + up(&dev->cmd.vars.throttle_sem); + return err; +} + +static void mlx5_cmd_err_trace(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod, void *out) +{ + u32 syndrome = MLX5_GET(mbox_out, out, syndrome); + u8 status = MLX5_GET(mbox_out, out, status); + + trace_mlx5_cmd(mlx5_command_str(opcode), opcode, op_mod, + cmd_status_str(status), status, syndrome, + cmd_status_to_err(status)); +} + +static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, + u32 syndrome, int err) +{ + const char *namep = mlx5_command_str(opcode); + struct mlx5_cmd_stats *stats; + unsigned long flags; + + if (!err || !(strcmp(namep, "unknown command opcode"))) + return; + + stats = xa_load(&dev->cmd.stats, opcode); + if (!stats) + return; + spin_lock_irqsave(&stats->lock, flags); + stats->failed++; + if (err < 0) + stats->last_failed_errno = -err; + if (err == -EREMOTEIO) { + stats->failed_mbox_status++; + stats->last_failed_mbox_status = status; + stats->last_failed_syndrome = syndrome; + } + spin_unlock_irqrestore(&stats->lock, flags); +} + +/* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */ +static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, u16 op_mod, void *out) +{ + u32 syndrome = MLX5_GET(mbox_out, out, syndrome); + u8 status = MLX5_GET(mbox_out, out, status); + + if (err == -EREMOTEIO) /* -EREMOTEIO is preserved */ + err = -EIO; + + if (!err && status != MLX5_CMD_STAT_OK) { + err = -EREMOTEIO; + mlx5_cmd_err_trace(dev, opcode, op_mod, out); + } + + cmd_status_log(dev, opcode, status, syndrome, err); + return err; +} + +/** + * mlx5_cmd_do - Executes a fw command, wait for completion. + * Unlike mlx5_cmd_exec, this function will not translate or intercept + * outbox.status and will return -EREMOTEIO when + * outbox.status != MLX5_CMD_STAT_OK + * + * @dev: mlx5 core device + * @in: inbox mlx5_ifc command buffer + * @in_size: inbox buffer size + * @out: outbox mlx5_ifc buffer + * @out_size: outbox size + * + * @return: + * -EREMOTEIO : Command executed by FW, outbox.status != MLX5_CMD_STAT_OK. + * Caller must check FW outbox status. + * 0 : Command execution successful, outbox.status == MLX5_CMD_STAT_OK. + * < 0 : Command execution couldn't be performed by firmware or driver + */ +int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size) +{ + int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false); + u16 op_mod = MLX5_GET(mbox_in, in, op_mod); + u16 opcode = in_to_opcode(in); + + return cmd_status_err(dev, err, opcode, op_mod, out); +} +EXPORT_SYMBOL(mlx5_cmd_do); + +/** + * mlx5_cmd_exec - Executes a fw command, wait for completion + * + * @dev: mlx5 core device + * @in: inbox mlx5_ifc command buffer + * @in_size: inbox buffer size + * @out: outbox mlx5_ifc buffer + * @out_size: outbox size + * + * @return: 0 if no error, FW command execution was successful + * and outbox status is ok. + */ +int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, + int out_size) +{ + int err = mlx5_cmd_do(dev, in, in_size, out, out_size); + + return mlx5_cmd_check(dev, err, in, out); +} +EXPORT_SYMBOL(mlx5_cmd_exec); + +/** + * mlx5_cmd_exec_polling - Executes a fw command, poll for completion + * Needed for driver force teardown, when command completion EQ + * will not be available to complete the command + * + * @dev: mlx5 core device + * @in: inbox mlx5_ifc command buffer + * @in_size: inbox buffer size + * @out: outbox mlx5_ifc buffer + * @out_size: outbox size + * + * @return: 0 if no error, FW command execution was successful + * and outbox status is ok. + */ +int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, + void *out, int out_size) +{ + int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true); + u16 op_mod = MLX5_GET(mbox_in, in, op_mod); + u16 opcode = in_to_opcode(in); + + err = cmd_status_err(dev, err, opcode, op_mod, out); + return mlx5_cmd_check(dev, err, in, out); +} +EXPORT_SYMBOL(mlx5_cmd_exec_polling); + +void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, + struct mlx5_async_ctx *ctx) +{ + ctx->dev = dev; + /* Starts at 1 to avoid doing wake_up if we are not cleaning up */ + atomic_set(&ctx->num_inflight, 1); + init_completion(&ctx->inflight_done); +} +EXPORT_SYMBOL(mlx5_cmd_init_async_ctx); + +/** + * mlx5_cmd_cleanup_async_ctx - Clean up an async_ctx + * @ctx: The ctx to clean + * + * Upon return all callbacks given to mlx5_cmd_exec_cb() have been called. The + * caller must ensure that mlx5_cmd_exec_cb() is not called during or after + * the call mlx5_cleanup_async_ctx(). + */ +void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx) +{ + if (!atomic_dec_and_test(&ctx->num_inflight)) + wait_for_completion(&ctx->inflight_done); +} +EXPORT_SYMBOL(mlx5_cmd_cleanup_async_ctx); + +static void mlx5_cmd_exec_cb_handler(int status, void *_work) +{ + struct mlx5_async_work *work = _work; + struct mlx5_async_ctx *ctx; + + ctx = work->ctx; + status = cmd_status_err(ctx->dev, status, work->opcode, work->op_mod, work->out); + work->user_callback(status, work); + if (atomic_dec_and_test(&ctx->num_inflight)) + complete(&ctx->inflight_done); +} + +int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, + void *out, int out_size, mlx5_async_cbk_t callback, + struct mlx5_async_work *work) +{ + int ret; + + work->ctx = ctx; + work->user_callback = callback; + work->opcode = in_to_opcode(in); + work->op_mod = MLX5_GET(mbox_in, in, op_mod); + work->out = out; + if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight))) + return -EIO; + ret = cmd_exec(ctx->dev, in, in_size, out, out_size, + mlx5_cmd_exec_cb_handler, work, false); + if (ret && atomic_dec_and_test(&ctx->num_inflight)) + complete(&ctx->inflight_done); + + return ret; +} +EXPORT_SYMBOL(mlx5_cmd_exec_cb); + +static void destroy_msg_cache(struct mlx5_core_dev *dev) +{ + struct cmd_msg_cache *ch; + struct mlx5_cmd_msg *msg; + struct mlx5_cmd_msg *n; + int i; + + for (i = 0; i < dev->profile.num_cmd_caches; i++) { + ch = &dev->cmd.cache[i]; + list_for_each_entry_safe(msg, n, &ch->head, list) { + list_del(&msg->list); + mlx5_free_cmd_msg(dev, msg); + } + } +} + +static unsigned cmd_cache_num_ent[MLX5_NUM_COMMAND_CACHES] = { + 512, 32, 16, 8, 2 +}; + +static unsigned cmd_cache_ent_size[MLX5_NUM_COMMAND_CACHES] = { + 16 + MLX5_CMD_DATA_BLOCK_SIZE, + 16 + MLX5_CMD_DATA_BLOCK_SIZE * 2, + 16 + MLX5_CMD_DATA_BLOCK_SIZE * 16, + 16 + MLX5_CMD_DATA_BLOCK_SIZE * 256, + 16 + MLX5_CMD_DATA_BLOCK_SIZE * 512, +}; + +static void create_msg_cache(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + struct cmd_msg_cache *ch; + struct mlx5_cmd_msg *msg; + int i; + int k; + + /* Initialize and fill the caches with initial entries */ + for (k = 0; k < dev->profile.num_cmd_caches; k++) { + ch = &cmd->cache[k]; + spin_lock_init(&ch->lock); + INIT_LIST_HEAD(&ch->head); + ch->num_ent = cmd_cache_num_ent[k]; + ch->max_inbox_size = cmd_cache_ent_size[k]; + for (i = 0; i < ch->num_ent; i++) { + msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL | __GFP_NOWARN, + ch->max_inbox_size, 0); + if (IS_ERR(msg)) + break; + msg->parent = ch; + list_add_tail(&msg->list, &ch->head); + } + } +} + +static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd) +{ + cmd->cmd_alloc_buf = dma_alloc_coherent(mlx5_core_dma_dev(dev), MLX5_ADAPTER_PAGE_SIZE, + &cmd->alloc_dma, GFP_KERNEL); + if (!cmd->cmd_alloc_buf) + return -ENOMEM; + + /* make sure it is aligned to 4K */ + if (!((uintptr_t)cmd->cmd_alloc_buf & (MLX5_ADAPTER_PAGE_SIZE - 1))) { + cmd->cmd_buf = cmd->cmd_alloc_buf; + cmd->dma = cmd->alloc_dma; + cmd->alloc_size = MLX5_ADAPTER_PAGE_SIZE; + return 0; + } + + dma_free_coherent(mlx5_core_dma_dev(dev), MLX5_ADAPTER_PAGE_SIZE, cmd->cmd_alloc_buf, + cmd->alloc_dma); + cmd->cmd_alloc_buf = dma_alloc_coherent(mlx5_core_dma_dev(dev), + 2 * MLX5_ADAPTER_PAGE_SIZE - 1, + &cmd->alloc_dma, GFP_KERNEL); + if (!cmd->cmd_alloc_buf) + return -ENOMEM; + + cmd->cmd_buf = PTR_ALIGN(cmd->cmd_alloc_buf, MLX5_ADAPTER_PAGE_SIZE); + cmd->dma = ALIGN(cmd->alloc_dma, MLX5_ADAPTER_PAGE_SIZE); + cmd->alloc_size = 2 * MLX5_ADAPTER_PAGE_SIZE - 1; + return 0; +} + +static void free_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd) +{ + dma_free_coherent(mlx5_core_dma_dev(dev), cmd->alloc_size, cmd->cmd_alloc_buf, + cmd->alloc_dma); +} + +static u16 cmdif_rev(struct mlx5_core_dev *dev) +{ + return ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16; +} + +int mlx5_cmd_init(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + + cmd->checksum_disabled = 1; + + spin_lock_init(&cmd->alloc_lock); + spin_lock_init(&cmd->token_lock); + + set_wqname(dev); + cmd->wq = create_singlethread_workqueue(cmd->wq_name); + if (!cmd->wq) { + mlx5_core_err(dev, "failed to create command workqueue\n"); + return -ENOMEM; + } + + mlx5_cmdif_debugfs_init(dev); + + return 0; +} + +void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + + mlx5_cmdif_debugfs_cleanup(dev); + destroy_workqueue(cmd->wq); +} + +int mlx5_cmd_enable(struct mlx5_core_dev *dev) +{ + int size = sizeof(struct mlx5_cmd_prot_block); + int align = roundup_pow_of_two(size); + struct mlx5_cmd *cmd = &dev->cmd; + u32 cmd_h, cmd_l; + int err; + + memset(&cmd->vars, 0, sizeof(cmd->vars)); + cmd->vars.cmdif_rev = cmdif_rev(dev); + if (cmd->vars.cmdif_rev != CMD_IF_REV) { + mlx5_core_err(dev, + "Driver cmdif rev(%d) differs from firmware's(%d)\n", + CMD_IF_REV, cmd->vars.cmdif_rev); + return -EINVAL; + } + + cmd_l = ioread32be(&dev->iseg->cmdq_addr_l_sz) & 0xff; + cmd->vars.log_sz = cmd_l >> 4 & 0xf; + cmd->vars.log_stride = cmd_l & 0xf; + if (1 << cmd->vars.log_sz > MLX5_MAX_COMMANDS) { + mlx5_core_err(dev, "firmware reports too many outstanding commands %d\n", + 1 << cmd->vars.log_sz); + return -EINVAL; + } + + if (cmd->vars.log_sz + cmd->vars.log_stride > MLX5_ADAPTER_PAGE_SHIFT) { + mlx5_core_err(dev, "command queue size overflow\n"); + return -EINVAL; + } + + cmd->state = MLX5_CMDIF_STATE_DOWN; + cmd->vars.max_reg_cmds = (1 << cmd->vars.log_sz) - 1; + cmd->vars.bitmask = (1UL << cmd->vars.max_reg_cmds) - 1; + + sema_init(&cmd->vars.sem, cmd->vars.max_reg_cmds); + sema_init(&cmd->vars.pages_sem, 1); + sema_init(&cmd->vars.throttle_sem, DIV_ROUND_UP(cmd->vars.max_reg_cmds, 2)); + + cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0); + if (!cmd->pool) + return -ENOMEM; + + err = alloc_cmd_page(dev, cmd); + if (err) + goto err_free_pool; + + cmd_h = (u32)((u64)(cmd->dma) >> 32); + cmd_l = (u32)(cmd->dma); + if (cmd_l & 0xfff) { + mlx5_core_err(dev, "invalid command queue address\n"); + err = -ENOMEM; + goto err_cmd_page; + } + + iowrite32be(cmd_h, &dev->iseg->cmdq_addr_h); + iowrite32be(cmd_l, &dev->iseg->cmdq_addr_l_sz); + + /* Make sure firmware sees the complete address before we proceed */ + wmb(); + + mlx5_core_dbg(dev, "descriptor at dma 0x%llx\n", (unsigned long long)(cmd->dma)); + + cmd->mode = CMD_MODE_POLLING; + cmd->allowed_opcode = CMD_ALLOWED_OPCODE_ALL; + + create_msg_cache(dev); + create_debugfs_files(dev); + + return 0; + +err_cmd_page: + free_cmd_page(dev, cmd); +err_free_pool: + dma_pool_destroy(cmd->pool); + return err; +} + +void mlx5_cmd_disable(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + + flush_workqueue(cmd->wq); + clean_debug_files(dev); + destroy_msg_cache(dev); + free_cmd_page(dev, cmd); + dma_pool_destroy(cmd->pool); +} + +void mlx5_cmd_set_state(struct mlx5_core_dev *dev, + enum mlx5_cmdif_state cmdif_state) +{ + dev->cmd.state = cmdif_state; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c new file mode 100644 index 0000000000..4caa1b6f40 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include "mlx5_core.h" +#include "lib/eq.h" + +#define TASKLET_MAX_TIME 2 +#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME) + +void mlx5_cq_tasklet_cb(struct tasklet_struct *t) +{ + unsigned long flags; + unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES; + struct mlx5_eq_tasklet *ctx = from_tasklet(ctx, t, task); + struct mlx5_core_cq *mcq; + struct mlx5_core_cq *temp; + + spin_lock_irqsave(&ctx->lock, flags); + list_splice_tail_init(&ctx->list, &ctx->process_list); + spin_unlock_irqrestore(&ctx->lock, flags); + + list_for_each_entry_safe(mcq, temp, &ctx->process_list, + tasklet_ctx.list) { + list_del_init(&mcq->tasklet_ctx.list); + mcq->tasklet_ctx.comp(mcq, NULL); + mlx5_cq_put(mcq); + if (time_after(jiffies, end)) + break; + } + + if (!list_empty(&ctx->process_list)) + tasklet_schedule(&ctx->task); +} + +static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, + struct mlx5_eqe *eqe) +{ + unsigned long flags; + struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv; + + spin_lock_irqsave(&tasklet_ctx->lock, flags); + /* When migrating CQs between EQs will be implemented, please note + * that you need to sync this point. It is possible that + * while migrating a CQ, completions on the old EQs could + * still arrive. + */ + if (list_empty_careful(&cq->tasklet_ctx.list)) { + mlx5_cq_hold(cq); + list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list); + } + spin_unlock_irqrestore(&tasklet_ctx->lock, flags); +} + +/* Callers must verify outbox status in case of err */ +int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + u32 *in, int inlen, u32 *out, int outlen) +{ + int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), + c_eqn_or_apu_element); + u32 din[MLX5_ST_SZ_DW(destroy_cq_in)] = {}; + struct mlx5_eq_comp *eq; + int err; + + eq = mlx5_eqn2comp_eq(dev, eqn); + if (IS_ERR(eq)) + return PTR_ERR(eq); + + memset(out, 0, outlen); + MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ); + err = mlx5_cmd_do(dev, in, inlen, out, outlen); + if (err) + return err; + + cq->cqn = MLX5_GET(create_cq_out, out, cqn); + cq->cons_index = 0; + cq->arm_sn = 0; + cq->eq = eq; + cq->uid = MLX5_GET(create_cq_in, in, uid); + refcount_set(&cq->refcount, 1); + init_completion(&cq->free); + if (!cq->comp) + cq->comp = mlx5_add_cq_to_tasklet; + /* assuming CQ will be deleted before the EQ */ + cq->tasklet_ctx.priv = &eq->tasklet_ctx; + INIT_LIST_HEAD(&cq->tasklet_ctx.list); + + /* Add to comp EQ CQ tree to recv comp events */ + err = mlx5_eq_add_cq(&eq->core, cq); + if (err) + goto err_cmd; + + /* Add to async EQ CQ tree to recv async events */ + err = mlx5_eq_add_cq(mlx5_get_async_eq(dev), cq); + if (err) + goto err_cq_add; + + cq->pid = current->pid; + err = mlx5_debug_cq_add(dev, cq); + if (err) + mlx5_core_dbg(dev, "failed adding CP 0x%x to debug file system\n", + cq->cqn); + + cq->uar = dev->priv.uar; + cq->irqn = eq->core.irqn; + + return 0; + +err_cq_add: + mlx5_eq_del_cq(&eq->core, cq); +err_cmd: + MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ); + MLX5_SET(destroy_cq_in, din, cqn, cq->cqn); + MLX5_SET(destroy_cq_in, din, uid, cq->uid); + mlx5_cmd_exec_in(dev, destroy_cq, din); + return err; +} +EXPORT_SYMBOL(mlx5_create_cq); + +/* oubox is checked and err val is normalized */ +int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + u32 *in, int inlen, u32 *out, int outlen) +{ + int err = mlx5_create_cq(dev, cq, in, inlen, out, outlen); + + return mlx5_cmd_check(dev, err, in, out); +} +EXPORT_SYMBOL(mlx5_core_create_cq); + +int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) +{ + u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {}; + int err; + + mlx5_debug_cq_remove(dev, cq); + + mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq); + mlx5_eq_del_cq(&cq->eq->core, cq); + + MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ); + MLX5_SET(destroy_cq_in, in, cqn, cq->cqn); + MLX5_SET(destroy_cq_in, in, uid, cq->uid); + err = mlx5_cmd_exec_in(dev, destroy_cq, in); + if (err) + return err; + + synchronize_irq(cq->irqn); + mlx5_cq_put(cq); + wait_for_completion(&cq->free); + + return 0; +} +EXPORT_SYMBOL(mlx5_core_destroy_cq); + +int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + u32 *out) +{ + u32 in[MLX5_ST_SZ_DW(query_cq_in)] = {}; + + MLX5_SET(query_cq_in, in, opcode, MLX5_CMD_OP_QUERY_CQ); + MLX5_SET(query_cq_in, in, cqn, cq->cqn); + return mlx5_cmd_exec_inout(dev, query_cq, in, out); +} +EXPORT_SYMBOL(mlx5_core_query_cq); + +int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_cq_out)] = {}; + + MLX5_SET(modify_cq_in, in, opcode, MLX5_CMD_OP_MODIFY_CQ); + MLX5_SET(modify_cq_in, in, uid, cq->uid); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_core_modify_cq); + +int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev, + struct mlx5_core_cq *cq, + u16 cq_period, + u16 cq_max_count) +{ + u32 in[MLX5_ST_SZ_DW(modify_cq_in)] = {}; + void *cqc; + + MLX5_SET(modify_cq_in, in, cqn, cq->cqn); + cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, cq_period, cq_period); + MLX5_SET(cqc, cqc, cq_max_count, cq_max_count); + MLX5_SET(modify_cq_in, in, + modify_field_select_resize_field_select.modify_field_select.modify_field_select, + MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT); + + return mlx5_core_modify_cq(dev, cq, in, sizeof(in)); +} +EXPORT_SYMBOL(mlx5_core_modify_cq_moderation); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c new file mode 100644 index 0000000000..09652dc891 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -0,0 +1,603 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include "mlx5_core.h" +#include "lib/eq.h" + +enum { + QP_PID, + QP_STATE, + QP_XPORT, + QP_MTU, + QP_N_RECV, + QP_RECV_SZ, + QP_N_SEND, + QP_LOG_PG_SZ, + QP_RQPN, +}; + +static char *qp_fields[] = { + [QP_PID] = "pid", + [QP_STATE] = "state", + [QP_XPORT] = "transport", + [QP_MTU] = "mtu", + [QP_N_RECV] = "num_recv", + [QP_RECV_SZ] = "rcv_wqe_sz", + [QP_N_SEND] = "num_send", + [QP_LOG_PG_SZ] = "log2_page_sz", + [QP_RQPN] = "remote_qpn", +}; + +enum { + EQ_NUM_EQES, + EQ_INTR, + EQ_LOG_PG_SZ, +}; + +static char *eq_fields[] = { + [EQ_NUM_EQES] = "num_eqes", + [EQ_INTR] = "intr", + [EQ_LOG_PG_SZ] = "log_page_size", +}; + +enum { + CQ_PID, + CQ_NUM_CQES, + CQ_LOG_PG_SZ, +}; + +static char *cq_fields[] = { + [CQ_PID] = "pid", + [CQ_NUM_CQES] = "num_cqes", + [CQ_LOG_PG_SZ] = "log_page_size", +}; + +struct dentry *mlx5_debugfs_root; +EXPORT_SYMBOL(mlx5_debugfs_root); + +void mlx5_register_debugfs(void) +{ + mlx5_debugfs_root = debugfs_create_dir("mlx5", NULL); +} + +void mlx5_unregister_debugfs(void) +{ + debugfs_remove(mlx5_debugfs_root); +} + +struct dentry *mlx5_debugfs_get_dev_root(struct mlx5_core_dev *dev) +{ + return dev->priv.dbg.dbg_root; +} +EXPORT_SYMBOL(mlx5_debugfs_get_dev_root); + +void mlx5_qp_debugfs_init(struct mlx5_core_dev *dev) +{ + dev->priv.dbg.qp_debugfs = debugfs_create_dir("QPs", dev->priv.dbg.dbg_root); +} +EXPORT_SYMBOL(mlx5_qp_debugfs_init); + +void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev) +{ + debugfs_remove_recursive(dev->priv.dbg.qp_debugfs); +} +EXPORT_SYMBOL(mlx5_qp_debugfs_cleanup); + +void mlx5_eq_debugfs_init(struct mlx5_core_dev *dev) +{ + dev->priv.dbg.eq_debugfs = debugfs_create_dir("EQs", dev->priv.dbg.dbg_root); +} + +void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev) +{ + debugfs_remove_recursive(dev->priv.dbg.eq_debugfs); +} + +static ssize_t average_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_cmd_stats *stats; + u64 field = 0; + int ret; + char tbuf[22]; + + stats = filp->private_data; + spin_lock_irq(&stats->lock); + if (stats->n) + field = div64_u64(stats->sum, stats->n); + spin_unlock_irq(&stats->lock); + ret = snprintf(tbuf, sizeof(tbuf), "%llu\n", field); + return simple_read_from_buffer(buf, count, pos, tbuf, ret); +} + +static ssize_t average_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_cmd_stats *stats; + + stats = filp->private_data; + spin_lock_irq(&stats->lock); + stats->sum = 0; + stats->n = 0; + spin_unlock_irq(&stats->lock); + + *pos += count; + + return count; +} + +static const struct file_operations stats_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = average_read, + .write = average_write, +}; + +static ssize_t slots_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_cmd *cmd; + char tbuf[6]; + int weight; + int field; + int ret; + + cmd = filp->private_data; + weight = bitmap_weight(&cmd->vars.bitmask, cmd->vars.max_reg_cmds); + field = cmd->vars.max_reg_cmds - weight; + ret = snprintf(tbuf, sizeof(tbuf), "%d\n", field); + return simple_read_from_buffer(buf, count, pos, tbuf, ret); +} + +static const struct file_operations slots_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = slots_read, +}; + +static struct mlx5_cmd_stats * +mlx5_cmdif_alloc_stats(struct xarray *stats_xa, int opcode) +{ + struct mlx5_cmd_stats *stats = kzalloc(sizeof(*stats), GFP_KERNEL); + int err; + + if (!stats) + return NULL; + + err = xa_insert(stats_xa, opcode, stats, GFP_KERNEL); + if (err) { + kfree(stats); + return NULL; + } + spin_lock_init(&stats->lock); + return stats; +} + +void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd_stats *stats; + struct dentry **cmd; + const char *namep; + int i; + + cmd = &dev->priv.dbg.cmdif_debugfs; + *cmd = debugfs_create_dir("commands", dev->priv.dbg.dbg_root); + + debugfs_create_file("slots_inuse", 0400, *cmd, &dev->cmd, &slots_fops); + + xa_init(&dev->cmd.stats); + + for (i = 0; i < MLX5_CMD_OP_MAX; i++) { + namep = mlx5_command_str(i); + if (strcmp(namep, "unknown command opcode")) { + stats = mlx5_cmdif_alloc_stats(&dev->cmd.stats, i); + if (!stats) + continue; + stats->root = debugfs_create_dir(namep, *cmd); + + debugfs_create_file("average", 0400, stats->root, stats, + &stats_fops); + debugfs_create_u64("n", 0400, stats->root, &stats->n); + debugfs_create_u64("failed", 0400, stats->root, &stats->failed); + debugfs_create_u64("failed_mbox_status", 0400, stats->root, + &stats->failed_mbox_status); + debugfs_create_u32("last_failed_errno", 0400, stats->root, + &stats->last_failed_errno); + debugfs_create_u8("last_failed_mbox_status", 0400, stats->root, + &stats->last_failed_mbox_status); + debugfs_create_x32("last_failed_syndrome", 0400, stats->root, + &stats->last_failed_syndrome); + } + } +} + +void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd_stats *stats; + unsigned long i; + + debugfs_remove_recursive(dev->priv.dbg.cmdif_debugfs); + xa_for_each(&dev->cmd.stats, i, stats) + kfree(stats); + xa_destroy(&dev->cmd.stats); +} + +void mlx5_cq_debugfs_init(struct mlx5_core_dev *dev) +{ + dev->priv.dbg.cq_debugfs = debugfs_create_dir("CQs", dev->priv.dbg.dbg_root); +} + +void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev) +{ + debugfs_remove_recursive(dev->priv.dbg.cq_debugfs); +} + +void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev) +{ + struct dentry *pages; + + dev->priv.dbg.pages_debugfs = debugfs_create_dir("pages", dev->priv.dbg.dbg_root); + pages = dev->priv.dbg.pages_debugfs; + + debugfs_create_u32("fw_pages_total", 0400, pages, &dev->priv.fw_pages); + debugfs_create_u32("fw_pages_vfs", 0400, pages, &dev->priv.page_counters[MLX5_VF]); + debugfs_create_u32("fw_pages_ec_vfs", 0400, pages, &dev->priv.page_counters[MLX5_EC_VF]); + debugfs_create_u32("fw_pages_sfs", 0400, pages, &dev->priv.page_counters[MLX5_SF]); + debugfs_create_u32("fw_pages_host_pf", 0400, pages, &dev->priv.page_counters[MLX5_HOST_PF]); + debugfs_create_u32("fw_pages_alloc_failed", 0400, pages, &dev->priv.fw_pages_alloc_failed); + debugfs_create_u32("fw_pages_give_dropped", 0400, pages, &dev->priv.give_pages_dropped); + debugfs_create_u32("fw_pages_reclaim_discard", 0400, pages, + &dev->priv.reclaim_pages_discard); +} + +void mlx5_pages_debugfs_cleanup(struct mlx5_core_dev *dev) +{ + debugfs_remove_recursive(dev->priv.dbg.pages_debugfs); +} + +static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, + int index, int *is_str) +{ + int outlen = MLX5_ST_SZ_BYTES(query_qp_out); + u32 in[MLX5_ST_SZ_DW(query_qp_in)] = {}; + u64 param = 0; + u32 *out; + int state; + u32 *qpc; + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return 0; + + MLX5_SET(query_qp_in, in, opcode, MLX5_CMD_OP_QUERY_QP); + MLX5_SET(query_qp_in, in, qpn, qp->qpn); + err = mlx5_cmd_exec_inout(dev, query_qp, in, out); + if (err) + goto out; + + *is_str = 0; + + qpc = MLX5_ADDR_OF(query_qp_out, out, qpc); + switch (index) { + case QP_PID: + param = qp->pid; + break; + case QP_STATE: + state = MLX5_GET(qpc, qpc, state); + param = (unsigned long)mlx5_qp_state_str(state); + *is_str = 1; + break; + case QP_XPORT: + param = (unsigned long)mlx5_qp_type_str(MLX5_GET(qpc, qpc, st)); + *is_str = 1; + break; + case QP_MTU: + switch (MLX5_GET(qpc, qpc, mtu)) { + case IB_MTU_256: + param = 256; + break; + case IB_MTU_512: + param = 512; + break; + case IB_MTU_1024: + param = 1024; + break; + case IB_MTU_2048: + param = 2048; + break; + case IB_MTU_4096: + param = 4096; + break; + default: + param = 0; + } + break; + case QP_N_RECV: + param = 1 << MLX5_GET(qpc, qpc, log_rq_size); + break; + case QP_RECV_SZ: + param = 1 << (MLX5_GET(qpc, qpc, log_rq_stride) + 4); + break; + case QP_N_SEND: + if (!MLX5_GET(qpc, qpc, no_sq)) + param = 1 << MLX5_GET(qpc, qpc, log_sq_size); + break; + case QP_LOG_PG_SZ: + param = MLX5_GET(qpc, qpc, log_page_size) + 12; + break; + case QP_RQPN: + param = MLX5_GET(qpc, qpc, remote_qpn); + break; + } +out: + kfree(out); + return param; +} + +static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + int index) +{ + int outlen = MLX5_ST_SZ_BYTES(query_eq_out); + u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {}; + u64 param = 0; + void *ctx; + u32 *out; + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return param; + + MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ); + MLX5_SET(query_eq_in, in, eq_number, eq->eqn); + err = mlx5_cmd_exec_inout(dev, query_eq, in, out); + if (err) { + mlx5_core_warn(dev, "failed to query eq\n"); + goto out; + } + ctx = MLX5_ADDR_OF(query_eq_out, out, eq_context_entry); + + switch (index) { + case EQ_NUM_EQES: + param = 1 << MLX5_GET(eqc, ctx, log_eq_size); + break; + case EQ_INTR: + param = MLX5_GET(eqc, ctx, intr); + break; + case EQ_LOG_PG_SZ: + param = MLX5_GET(eqc, ctx, log_page_size) + 12; + break; + } + +out: + kfree(out); + return param; +} + +static u64 cq_read_field(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + int index) +{ + int outlen = MLX5_ST_SZ_BYTES(query_cq_out); + u64 param = 0; + void *ctx; + u32 *out; + int err; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return param; + + err = mlx5_core_query_cq(dev, cq, out); + if (err) { + mlx5_core_warn(dev, "failed to query cq\n"); + goto out; + } + ctx = MLX5_ADDR_OF(query_cq_out, out, cq_context); + + switch (index) { + case CQ_PID: + param = cq->pid; + break; + case CQ_NUM_CQES: + param = 1 << MLX5_GET(cqc, ctx, log_cq_size); + break; + case CQ_LOG_PG_SZ: + param = MLX5_GET(cqc, ctx, log_page_size); + break; + } + +out: + kvfree(out); + return param; +} + +static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_field_desc *desc; + struct mlx5_rsc_debug *d; + char tbuf[18]; + int is_str = 0; + u64 field; + int ret; + + desc = filp->private_data; + d = (void *)(desc - desc->i) - sizeof(*d); + switch (d->type) { + case MLX5_DBG_RSC_QP: + field = qp_read_field(d->dev, d->object, desc->i, &is_str); + break; + + case MLX5_DBG_RSC_EQ: + field = eq_read_field(d->dev, d->object, desc->i); + break; + + case MLX5_DBG_RSC_CQ: + field = cq_read_field(d->dev, d->object, desc->i); + break; + + default: + mlx5_core_warn(d->dev, "invalid resource type %d\n", d->type); + return -EINVAL; + } + + if (is_str) + ret = snprintf(tbuf, sizeof(tbuf), "%s\n", (const char *)(unsigned long)field); + else + ret = snprintf(tbuf, sizeof(tbuf), "0x%llx\n", field); + + return simple_read_from_buffer(buf, count, pos, tbuf, ret); +} + +static const struct file_operations fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = dbg_read, +}; + +static int add_res_tree(struct mlx5_core_dev *dev, enum dbg_rsc_type type, + struct dentry *root, struct mlx5_rsc_debug **dbg, + int rsn, char **field, int nfile, void *data) +{ + struct mlx5_rsc_debug *d; + char resn[32]; + int i; + + d = kzalloc(struct_size(d, fields, nfile), GFP_KERNEL); + if (!d) + return -ENOMEM; + + d->dev = dev; + d->object = data; + d->type = type; + sprintf(resn, "0x%x", rsn); + d->root = debugfs_create_dir(resn, root); + + for (i = 0; i < nfile; i++) { + d->fields[i].i = i; + debugfs_create_file(field[i], 0400, d->root, &d->fields[i], + &fops); + } + *dbg = d; + + return 0; +} + +static void rem_res_tree(struct mlx5_rsc_debug *d) +{ + debugfs_remove_recursive(d->root); + kfree(d); +} + +int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp) +{ + int err; + + if (!mlx5_debugfs_root) + return 0; + + err = add_res_tree(dev, MLX5_DBG_RSC_QP, dev->priv.dbg.qp_debugfs, + &qp->dbg, qp->qpn, qp_fields, + ARRAY_SIZE(qp_fields), qp); + if (err) + qp->dbg = NULL; + + return err; +} +EXPORT_SYMBOL(mlx5_debug_qp_add); + +void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp) +{ + if (!mlx5_debugfs_root || !qp->dbg) + return; + + rem_res_tree(qp->dbg); + qp->dbg = NULL; +} +EXPORT_SYMBOL(mlx5_debug_qp_remove); + +int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + int err; + + if (!mlx5_debugfs_root) + return 0; + + err = add_res_tree(dev, MLX5_DBG_RSC_EQ, dev->priv.dbg.eq_debugfs, + &eq->dbg, eq->eqn, eq_fields, + ARRAY_SIZE(eq_fields), eq); + if (err) + eq->dbg = NULL; + + return err; +} + +void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + if (!mlx5_debugfs_root) + return; + + if (eq->dbg) + rem_res_tree(eq->dbg); +} + +int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) +{ + int err; + + if (!mlx5_debugfs_root) + return 0; + + err = add_res_tree(dev, MLX5_DBG_RSC_CQ, dev->priv.dbg.cq_debugfs, + &cq->dbg, cq->cqn, cq_fields, + ARRAY_SIZE(cq_fields), cq); + if (err) + cq->dbg = NULL; + + return err; +} + +void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) +{ + if (!mlx5_debugfs_root) + return; + + if (cq->dbg) { + rem_res_tree(cq->dbg); + cq->dbg = NULL; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c new file mode 100644 index 0000000000..7909f378dc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -0,0 +1,632 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include "mlx5_core.h" +#include "devlink.h" +#include "lag/lag.h" + +/* intf dev list mutex */ +static DEFINE_MUTEX(mlx5_intf_mutex); +static DEFINE_IDA(mlx5_adev_ida); + +static bool is_eth_rep_supported(struct mlx5_core_dev *dev) +{ + if (!IS_ENABLED(CONFIG_MLX5_ESWITCH)) + return false; + + if (!MLX5_ESWITCH_MANAGER(dev)) + return false; + + if (!is_mdev_switchdev_mode(dev)) + return false; + + return true; +} + +bool mlx5_eth_supported(struct mlx5_core_dev *dev) +{ + if (!IS_ENABLED(CONFIG_MLX5_CORE_EN)) + return false; + + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return false; + + if (!MLX5_CAP_GEN(dev, eth_net_offloads)) { + mlx5_core_warn(dev, "Missing eth_net_offloads capability\n"); + return false; + } + + if (!MLX5_CAP_GEN(dev, nic_flow_table)) { + mlx5_core_warn(dev, "Missing nic_flow_table capability\n"); + return false; + } + + if (!MLX5_CAP_ETH(dev, csum_cap)) { + mlx5_core_warn(dev, "Missing csum_cap capability\n"); + return false; + } + + if (!MLX5_CAP_ETH(dev, max_lso_cap)) { + mlx5_core_warn(dev, "Missing max_lso_cap capability\n"); + return false; + } + + if (!MLX5_CAP_ETH(dev, vlan_cap)) { + mlx5_core_warn(dev, "Missing vlan_cap capability\n"); + return false; + } + + if (!MLX5_CAP_ETH(dev, rss_ind_tbl_cap)) { + mlx5_core_warn(dev, "Missing rss_ind_tbl_cap capability\n"); + return false; + } + + if (MLX5_CAP_FLOWTABLE(dev, + flow_table_properties_nic_receive.max_ft_level) < 3) { + mlx5_core_warn(dev, "max_ft_level < 3\n"); + return false; + } + + if (!MLX5_CAP_ETH(dev, self_lb_en_modifiable)) + mlx5_core_warn(dev, "Self loop back prevention is not supported\n"); + if (!MLX5_CAP_GEN(dev, cq_moderation)) + mlx5_core_warn(dev, "CQ moderation is not supported\n"); + + return true; +} + +bool mlx5_vnet_supported(struct mlx5_core_dev *dev) +{ + if (!IS_ENABLED(CONFIG_MLX5_VDPA_NET)) + return false; + + if (mlx5_core_is_pf(dev)) + return false; + + if (!(MLX5_CAP_GEN_64(dev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q)) + return false; + + if (!(MLX5_CAP_DEV_VDPA_EMULATION(dev, event_mode) & + MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE)) + return false; + + if (!MLX5_CAP_DEV_VDPA_EMULATION(dev, eth_frame_offload_type)) + return false; + + return true; +} + +static bool is_vnet_enabled(struct mlx5_core_dev *dev) +{ + union devlink_param_value val; + int err; + + err = devl_param_driverinit_value_get(priv_to_devlink(dev), + DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, + &val); + return err ? false : val.vbool; +} + +static bool is_ib_rep_supported(struct mlx5_core_dev *dev) +{ + if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND)) + return false; + + if (dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV) + return false; + + if (!is_eth_rep_supported(dev)) + return false; + + if (mlx5_core_mp_enabled(dev)) + return false; + + return true; +} + +static bool is_mp_supported(struct mlx5_core_dev *dev) +{ + if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND)) + return false; + + if (dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV) + return false; + + if (is_ib_rep_supported(dev)) + return false; + + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return false; + + if (!mlx5_core_is_mp_slave(dev)) + return false; + + return true; +} + +bool mlx5_rdma_supported(struct mlx5_core_dev *dev) +{ + if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND)) + return false; + + if (dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV) + return false; + + if (is_ib_rep_supported(dev)) + return false; + + if (is_mp_supported(dev)) + return false; + + return true; +} + +static bool is_ib_enabled(struct mlx5_core_dev *dev) +{ + union devlink_param_value val; + int err; + + err = devl_param_driverinit_value_get(priv_to_devlink(dev), + DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, + &val); + return err ? false : val.vbool; +} + +enum { + MLX5_INTERFACE_PROTOCOL_ETH, + MLX5_INTERFACE_PROTOCOL_ETH_REP, + + MLX5_INTERFACE_PROTOCOL_IB, + MLX5_INTERFACE_PROTOCOL_IB_REP, + MLX5_INTERFACE_PROTOCOL_MPIB, + + MLX5_INTERFACE_PROTOCOL_VNET, +}; + +static const struct mlx5_adev_device { + const char *suffix; + bool (*is_supported)(struct mlx5_core_dev *dev); + bool (*is_enabled)(struct mlx5_core_dev *dev); +} mlx5_adev_devices[] = { + [MLX5_INTERFACE_PROTOCOL_VNET] = { .suffix = "vnet", + .is_supported = &mlx5_vnet_supported, + .is_enabled = &is_vnet_enabled }, + [MLX5_INTERFACE_PROTOCOL_IB] = { .suffix = "rdma", + .is_supported = &mlx5_rdma_supported, + .is_enabled = &is_ib_enabled }, + [MLX5_INTERFACE_PROTOCOL_ETH] = { .suffix = "eth", + .is_supported = &mlx5_eth_supported, + .is_enabled = &mlx5_core_is_eth_enabled }, + [MLX5_INTERFACE_PROTOCOL_ETH_REP] = { .suffix = "eth-rep", + .is_supported = &is_eth_rep_supported }, + [MLX5_INTERFACE_PROTOCOL_IB_REP] = { .suffix = "rdma-rep", + .is_supported = &is_ib_rep_supported }, + [MLX5_INTERFACE_PROTOCOL_MPIB] = { .suffix = "multiport", + .is_supported = &is_mp_supported }, +}; + +int mlx5_adev_idx_alloc(void) +{ + return ida_alloc(&mlx5_adev_ida, GFP_KERNEL); +} + +void mlx5_adev_idx_free(int idx) +{ + ida_free(&mlx5_adev_ida, idx); +} + +int mlx5_adev_init(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + + priv->adev = kcalloc(ARRAY_SIZE(mlx5_adev_devices), + sizeof(struct mlx5_adev *), GFP_KERNEL); + if (!priv->adev) + return -ENOMEM; + + return 0; +} + +void mlx5_adev_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + + kfree(priv->adev); +} + +static void adev_release(struct device *dev) +{ + struct mlx5_adev *mlx5_adev = + container_of(dev, struct mlx5_adev, adev.dev); + struct mlx5_priv *priv = &mlx5_adev->mdev->priv; + int idx = mlx5_adev->idx; + + kfree(mlx5_adev); + priv->adev[idx] = NULL; +} + +static struct mlx5_adev *add_adev(struct mlx5_core_dev *dev, int idx) +{ + const char *suffix = mlx5_adev_devices[idx].suffix; + struct auxiliary_device *adev; + struct mlx5_adev *madev; + int ret; + + madev = kzalloc(sizeof(*madev), GFP_KERNEL); + if (!madev) + return ERR_PTR(-ENOMEM); + + adev = &madev->adev; + adev->id = dev->priv.adev_idx; + adev->name = suffix; + adev->dev.parent = dev->device; + adev->dev.release = adev_release; + madev->mdev = dev; + madev->idx = idx; + + ret = auxiliary_device_init(adev); + if (ret) { + kfree(madev); + return ERR_PTR(ret); + } + + ret = auxiliary_device_add(adev); + if (ret) { + auxiliary_device_uninit(adev); + return ERR_PTR(ret); + } + return madev; +} + +static void del_adev(struct auxiliary_device *adev) +{ + auxiliary_device_delete(adev); + auxiliary_device_uninit(adev); +} + +void mlx5_dev_set_lightweight(struct mlx5_core_dev *dev) +{ + mutex_lock(&mlx5_intf_mutex); + dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV; + mutex_unlock(&mlx5_intf_mutex); +} + +bool mlx5_dev_is_lightweight(struct mlx5_core_dev *dev) +{ + return dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV; +} + +int mlx5_attach_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct auxiliary_device *adev; + struct auxiliary_driver *adrv; + int ret = 0, i; + + devl_assert_locked(priv_to_devlink(dev)); + mutex_lock(&mlx5_intf_mutex); + priv->flags &= ~MLX5_PRIV_FLAGS_DETACH; + for (i = 0; i < ARRAY_SIZE(mlx5_adev_devices); i++) { + if (!priv->adev[i]) { + bool is_supported = false; + + if (mlx5_adev_devices[i].is_enabled) { + bool enabled; + + enabled = mlx5_adev_devices[i].is_enabled(dev); + if (!enabled) + continue; + } + + if (mlx5_adev_devices[i].is_supported) + is_supported = mlx5_adev_devices[i].is_supported(dev); + + if (!is_supported) + continue; + + priv->adev[i] = add_adev(dev, i); + if (IS_ERR(priv->adev[i])) { + ret = PTR_ERR(priv->adev[i]); + priv->adev[i] = NULL; + } + } else { + adev = &priv->adev[i]->adev; + + /* Pay attention that this is not PCI driver that + * mlx5_core_dev is connected, but auxiliary driver. + */ + if (!adev->dev.driver) + continue; + adrv = to_auxiliary_drv(adev->dev.driver); + + if (adrv->resume) + ret = adrv->resume(adev); + } + if (ret) { + mlx5_core_warn(dev, "Device[%d] (%s) failed to load\n", + i, mlx5_adev_devices[i].suffix); + + break; + } + } + mutex_unlock(&mlx5_intf_mutex); + return ret; +} + +void mlx5_detach_device(struct mlx5_core_dev *dev, bool suspend) +{ + struct mlx5_priv *priv = &dev->priv; + struct auxiliary_device *adev; + struct auxiliary_driver *adrv; + pm_message_t pm = {}; + int i; + + devl_assert_locked(priv_to_devlink(dev)); + mutex_lock(&mlx5_intf_mutex); + for (i = ARRAY_SIZE(mlx5_adev_devices) - 1; i >= 0; i--) { + if (!priv->adev[i]) + continue; + + if (mlx5_adev_devices[i].is_enabled) { + bool enabled; + + enabled = mlx5_adev_devices[i].is_enabled(dev); + if (!enabled) + goto skip_suspend; + } + + adev = &priv->adev[i]->adev; + /* Auxiliary driver was unbind manually through sysfs */ + if (!adev->dev.driver) + goto skip_suspend; + + adrv = to_auxiliary_drv(adev->dev.driver); + + if (adrv->suspend && suspend) { + adrv->suspend(adev, pm); + continue; + } + +skip_suspend: + del_adev(&priv->adev[i]->adev); + priv->adev[i] = NULL; + } + priv->flags |= MLX5_PRIV_FLAGS_DETACH; + mutex_unlock(&mlx5_intf_mutex); +} + +int mlx5_register_device(struct mlx5_core_dev *dev) +{ + int ret; + + devl_assert_locked(priv_to_devlink(dev)); + mutex_lock(&mlx5_intf_mutex); + dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV; + ret = mlx5_rescan_drivers_locked(dev); + mutex_unlock(&mlx5_intf_mutex); + if (ret) + mlx5_unregister_device(dev); + + return ret; +} + +void mlx5_unregister_device(struct mlx5_core_dev *dev) +{ + devl_assert_locked(priv_to_devlink(dev)); + mutex_lock(&mlx5_intf_mutex); + dev->priv.flags = MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV; + mlx5_rescan_drivers_locked(dev); + mutex_unlock(&mlx5_intf_mutex); +} + +static int add_drivers(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + int i, ret = 0; + + for (i = 0; i < ARRAY_SIZE(mlx5_adev_devices); i++) { + bool is_supported = false; + + if (priv->adev[i]) + continue; + + if (mlx5_adev_devices[i].is_enabled && + !(mlx5_adev_devices[i].is_enabled(dev))) + continue; + + if (mlx5_adev_devices[i].is_supported) + is_supported = mlx5_adev_devices[i].is_supported(dev); + + if (!is_supported) + continue; + + priv->adev[i] = add_adev(dev, i); + if (IS_ERR(priv->adev[i])) { + mlx5_core_warn(dev, "Device[%d] (%s) failed to load\n", + i, mlx5_adev_devices[i].suffix); + /* We continue to rescan drivers and leave to the caller + * to make decision if to release everything or continue. + */ + ret = PTR_ERR(priv->adev[i]); + priv->adev[i] = NULL; + } + } + return ret; +} + +static void delete_drivers(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + bool delete_all; + int i; + + delete_all = priv->flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV; + + for (i = ARRAY_SIZE(mlx5_adev_devices) - 1; i >= 0; i--) { + bool is_supported = false; + + if (!priv->adev[i]) + continue; + + if (mlx5_adev_devices[i].is_enabled) { + bool enabled; + + enabled = mlx5_adev_devices[i].is_enabled(dev); + if (!enabled) + goto del_adev; + } + + if (mlx5_adev_devices[i].is_supported && !delete_all) + is_supported = mlx5_adev_devices[i].is_supported(dev); + + if (is_supported) + continue; + +del_adev: + del_adev(&priv->adev[i]->adev); + priv->adev[i] = NULL; + } +} + +/* This function is used after mlx5_core_dev is reconfigured. + */ +int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + + lockdep_assert_held(&mlx5_intf_mutex); + if (priv->flags & MLX5_PRIV_FLAGS_DETACH) + return 0; + + delete_drivers(dev); + if (priv->flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) + return 0; + + return add_drivers(dev); +} + +bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev) +{ + u64 fsystem_guid, psystem_guid; + + fsystem_guid = mlx5_query_nic_system_image_guid(dev); + psystem_guid = mlx5_query_nic_system_image_guid(peer_dev); + + return (fsystem_guid && psystem_guid && fsystem_guid == psystem_guid); +} + +static u32 mlx5_gen_pci_id(const struct mlx5_core_dev *dev) +{ + return (u32)((pci_domain_nr(dev->pdev->bus) << 16) | + (dev->pdev->bus->number << 8) | + PCI_SLOT(dev->pdev->devfn)); +} + +static int _next_phys_dev(struct mlx5_core_dev *mdev, + const struct mlx5_core_dev *curr) +{ + if (!mlx5_core_is_pf(mdev)) + return 0; + + if (mdev == curr) + return 0; + + if (!mlx5_same_hw_devs(mdev, (struct mlx5_core_dev *)curr) && + mlx5_gen_pci_id(mdev) != mlx5_gen_pci_id(curr)) + return 0; + + return 1; +} + +static void *pci_get_other_drvdata(struct device *this, struct device *other) +{ + if (this->driver != other->driver) + return NULL; + + return pci_get_drvdata(to_pci_dev(other)); +} + +static int next_phys_dev_lag(struct device *dev, const void *data) +{ + struct mlx5_core_dev *mdev, *this = (struct mlx5_core_dev *)data; + + mdev = pci_get_other_drvdata(this->device, dev); + if (!mdev) + return 0; + + if (!mlx5_lag_is_supported(mdev)) + return 0; + + return _next_phys_dev(mdev, data); +} + +static struct mlx5_core_dev *mlx5_get_next_dev(struct mlx5_core_dev *dev, + int (*match)(struct device *dev, const void *data)) +{ + struct device *next; + + if (!mlx5_core_is_pf(dev)) + return NULL; + + next = bus_find_device(&pci_bus_type, NULL, dev, match); + if (!next) + return NULL; + + put_device(next); + return pci_get_drvdata(to_pci_dev(next)); +} + +/* Must be called with intf_mutex held */ +struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev) +{ + lockdep_assert_held(&mlx5_intf_mutex); + return mlx5_get_next_dev(dev, &next_phys_dev_lag); +} + +void mlx5_dev_list_lock(void) +{ + mutex_lock(&mlx5_intf_mutex); +} +void mlx5_dev_list_unlock(void) +{ + mutex_unlock(&mlx5_intf_mutex); +} + +int mlx5_dev_list_trylock(void) +{ + return mutex_trylock(&mlx5_intf_mutex); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c new file mode 100644 index 0000000000..af8460bb25 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -0,0 +1,860 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies */ + +#include + +#include "mlx5_core.h" +#include "fw_reset.h" +#include "fs_core.h" +#include "eswitch.h" +#include "esw/qos.h" +#include "sf/dev/dev.h" +#include "sf/sf.h" + +static int mlx5_devlink_flash_update(struct devlink *devlink, + struct devlink_flash_update_params *params, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + return mlx5_firmware_flash(dev, params->fw, extack); +} + +static u8 mlx5_fw_ver_major(u32 version) +{ + return (version >> 24) & 0xff; +} + +static u8 mlx5_fw_ver_minor(u32 version) +{ + return (version >> 16) & 0xff; +} + +static u16 mlx5_fw_ver_subminor(u32 version) +{ + return version & 0xffff; +} + +#define DEVLINK_FW_STRING_LEN 32 + +static int +mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + char version_str[DEVLINK_FW_STRING_LEN]; + u32 running_fw, stored_fw; + int err; + + err = devlink_info_version_fixed_put(req, "fw.psid", dev->board_id); + if (err) + return err; + + err = mlx5_fw_version_query(dev, &running_fw, &stored_fw); + if (err) + return err; + + snprintf(version_str, sizeof(version_str), "%d.%d.%04d", + mlx5_fw_ver_major(running_fw), mlx5_fw_ver_minor(running_fw), + mlx5_fw_ver_subminor(running_fw)); + err = devlink_info_version_running_put(req, "fw.version", version_str); + if (err) + return err; + err = devlink_info_version_running_put(req, + DEVLINK_INFO_VERSION_GENERIC_FW, + version_str); + if (err) + return err; + + /* no pending version, return running (stored) version */ + if (stored_fw == 0) + stored_fw = running_fw; + + snprintf(version_str, sizeof(version_str), "%d.%d.%04d", + mlx5_fw_ver_major(stored_fw), mlx5_fw_ver_minor(stored_fw), + mlx5_fw_ver_subminor(stored_fw)); + err = devlink_info_version_stored_put(req, "fw.version", version_str); + if (err) + return err; + return devlink_info_version_stored_put(req, + DEVLINK_INFO_VERSION_GENERIC_FW, + version_str); +} + +static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u8 reset_level, reset_type, net_port_alive; + int err; + + err = mlx5_fw_reset_query(dev, &reset_level, &reset_type); + if (err) + return err; + if (!(reset_level & MLX5_MFRL_REG_RESET_LEVEL3)) { + NL_SET_ERR_MSG_MOD(extack, "FW activate requires reboot"); + return -EINVAL; + } + + net_port_alive = !!(reset_type & MLX5_MFRL_REG_RESET_TYPE_NET_PORT_ALIVE); + err = mlx5_fw_reset_set_reset_sync(dev, net_port_alive, extack); + if (err) + return err; + + err = mlx5_fw_reset_wait_reset_done(dev); + if (err) + return err; + + mlx5_unload_one_devl_locked(dev, true); + err = mlx5_health_wait_pci_up(dev); + if (err) + NL_SET_ERR_MSG_MOD(extack, "FW activate aborted, PCI reads fail after reset"); + + return err; +} + +static int mlx5_devlink_trigger_fw_live_patch(struct devlink *devlink, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u8 reset_level; + int err; + + err = mlx5_fw_reset_query(dev, &reset_level, NULL); + if (err) + return err; + if (!(reset_level & MLX5_MFRL_REG_RESET_LEVEL0)) { + NL_SET_ERR_MSG_MOD(extack, + "FW upgrade to the stored FW can't be done by FW live patching"); + return -EINVAL; + } + + return mlx5_fw_reset_set_live_patch(dev); +} + +static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, + enum devlink_reload_action action, + enum devlink_reload_limit limit, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct pci_dev *pdev = dev->pdev; + bool sf_dev_allocated; + int ret = 0; + + if (mlx5_dev_is_lightweight(dev)) { + if (action != DEVLINK_RELOAD_ACTION_DRIVER_REINIT) + return -EOPNOTSUPP; + mlx5_unload_one_light(dev); + return 0; + } + + sf_dev_allocated = mlx5_sf_dev_allocated(dev); + if (sf_dev_allocated) { + /* Reload results in deleting SF device which further results in + * unregistering devlink instance while holding devlink_mutext. + * Hence, do not support reload. + */ + NL_SET_ERR_MSG_MOD(extack, "reload is unsupported when SFs are allocated"); + return -EOPNOTSUPP; + } + + if (mlx5_lag_is_active(dev)) { + NL_SET_ERR_MSG_MOD(extack, "reload is unsupported in Lag mode"); + return -EOPNOTSUPP; + } + + if (mlx5_core_is_mp_slave(dev)) { + NL_SET_ERR_MSG_MOD(extack, "reload is unsupported for multi port slave"); + return -EOPNOTSUPP; + } + + if (mlx5_core_is_pf(dev) && pci_num_vf(pdev)) + NL_SET_ERR_MSG_MOD(extack, "reload while VFs are present is unfavorable"); + + switch (action) { + case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: + mlx5_unload_one_devl_locked(dev, false); + break; + case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: + if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET) + ret = mlx5_devlink_trigger_fw_live_patch(devlink, extack); + else + ret = mlx5_devlink_reload_fw_activate(devlink, extack); + break; + default: + /* Unsupported action should not get to this function */ + WARN_ON(1); + ret = -EOPNOTSUPP; + } + + return ret; +} + +static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_action action, + enum devlink_reload_limit limit, u32 *actions_performed, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + int ret = 0; + + *actions_performed = BIT(action); + switch (action) { + case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: + if (mlx5_dev_is_lightweight(dev)) { + mlx5_fw_reporters_create(dev); + return mlx5_init_one_devl_locked(dev); + } + ret = mlx5_load_one_devl_locked(dev, false); + break; + case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: + if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET) + break; + /* On fw_activate action, also driver is reloaded and reinit performed */ + *actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); + ret = mlx5_load_one_devl_locked(dev, true); + if (ret) + return ret; + ret = mlx5_fw_reset_verify_fw_complete(dev, extack); + break; + default: + /* Unsupported action should not get to this function */ + WARN_ON(1); + ret = -EOPNOTSUPP; + } + + return ret; +} + +static struct mlx5_devlink_trap *mlx5_find_trap_by_id(struct mlx5_core_dev *dev, int trap_id) +{ + struct mlx5_devlink_trap *dl_trap; + + list_for_each_entry(dl_trap, &dev->priv.traps, list) + if (dl_trap->trap.id == trap_id) + return dl_trap; + + return NULL; +} + +static int mlx5_devlink_trap_init(struct devlink *devlink, const struct devlink_trap *trap, + void *trap_ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_devlink_trap *dl_trap; + + dl_trap = kzalloc(sizeof(*dl_trap), GFP_KERNEL); + if (!dl_trap) + return -ENOMEM; + + dl_trap->trap.id = trap->id; + dl_trap->trap.action = DEVLINK_TRAP_ACTION_DROP; + dl_trap->item = trap_ctx; + + if (mlx5_find_trap_by_id(dev, trap->id)) { + kfree(dl_trap); + mlx5_core_err(dev, "Devlink trap: Trap 0x%x already found", trap->id); + return -EEXIST; + } + + list_add_tail(&dl_trap->list, &dev->priv.traps); + return 0; +} + +static void mlx5_devlink_trap_fini(struct devlink *devlink, const struct devlink_trap *trap, + void *trap_ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_devlink_trap *dl_trap; + + dl_trap = mlx5_find_trap_by_id(dev, trap->id); + if (!dl_trap) { + mlx5_core_err(dev, "Devlink trap: Missing trap id 0x%x", trap->id); + return; + } + list_del(&dl_trap->list); + kfree(dl_trap); +} + +static int mlx5_devlink_trap_action_set(struct devlink *devlink, + const struct devlink_trap *trap, + enum devlink_trap_action action, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_devlink_trap_event_ctx trap_event_ctx; + enum devlink_trap_action action_orig; + struct mlx5_devlink_trap *dl_trap; + int err; + + if (is_mdev_switchdev_mode(dev)) { + NL_SET_ERR_MSG_MOD(extack, "Devlink traps can't be set in switchdev mode"); + return -EOPNOTSUPP; + } + + dl_trap = mlx5_find_trap_by_id(dev, trap->id); + if (!dl_trap) { + mlx5_core_err(dev, "Devlink trap: Set action on invalid trap id 0x%x", trap->id); + return -EINVAL; + } + + if (action != DEVLINK_TRAP_ACTION_DROP && action != DEVLINK_TRAP_ACTION_TRAP) + return -EOPNOTSUPP; + + if (action == dl_trap->trap.action) + return 0; + + action_orig = dl_trap->trap.action; + dl_trap->trap.action = action; + trap_event_ctx.trap = &dl_trap->trap; + trap_event_ctx.err = 0; + err = mlx5_blocking_notifier_call_chain(dev, MLX5_DRIVER_EVENT_TYPE_TRAP, + &trap_event_ctx); + if (err == NOTIFY_BAD) + dl_trap->trap.action = action_orig; + + return trap_event_ctx.err; +} + +static const struct devlink_ops mlx5_devlink_ops = { +#ifdef CONFIG_MLX5_ESWITCH + .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, + .eswitch_mode_get = mlx5_devlink_eswitch_mode_get, + .eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set, + .eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get, + .eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set, + .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get, + .rate_leaf_tx_share_set = mlx5_esw_devlink_rate_leaf_tx_share_set, + .rate_leaf_tx_max_set = mlx5_esw_devlink_rate_leaf_tx_max_set, + .rate_node_tx_share_set = mlx5_esw_devlink_rate_node_tx_share_set, + .rate_node_tx_max_set = mlx5_esw_devlink_rate_node_tx_max_set, + .rate_node_new = mlx5_esw_devlink_rate_node_new, + .rate_node_del = mlx5_esw_devlink_rate_node_del, + .rate_leaf_parent_set = mlx5_esw_devlink_rate_parent_set, +#endif +#ifdef CONFIG_MLX5_SF_MANAGER + .port_new = mlx5_devlink_sf_port_new, +#endif + .flash_update = mlx5_devlink_flash_update, + .info_get = mlx5_devlink_info_get, + .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | + BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE), + .reload_limits = BIT(DEVLINK_RELOAD_LIMIT_NO_RESET), + .reload_down = mlx5_devlink_reload_down, + .reload_up = mlx5_devlink_reload_up, + .trap_init = mlx5_devlink_trap_init, + .trap_fini = mlx5_devlink_trap_fini, + .trap_action_set = mlx5_devlink_trap_action_set, +}; + +void mlx5_devlink_trap_report(struct mlx5_core_dev *dev, int trap_id, struct sk_buff *skb, + struct devlink_port *dl_port) +{ + struct devlink *devlink = priv_to_devlink(dev); + struct mlx5_devlink_trap *dl_trap; + + dl_trap = mlx5_find_trap_by_id(dev, trap_id); + if (!dl_trap) { + mlx5_core_err(dev, "Devlink trap: Report on invalid trap id 0x%x", trap_id); + return; + } + + if (dl_trap->trap.action != DEVLINK_TRAP_ACTION_TRAP) { + mlx5_core_dbg(dev, "Devlink trap: Trap id %d has action %d", trap_id, + dl_trap->trap.action); + return; + } + devlink_trap_report(devlink, skb, dl_trap->item, dl_port, NULL); +} + +int mlx5_devlink_trap_get_num_active(struct mlx5_core_dev *dev) +{ + struct mlx5_devlink_trap *dl_trap; + int count = 0; + + list_for_each_entry(dl_trap, &dev->priv.traps, list) + if (dl_trap->trap.action == DEVLINK_TRAP_ACTION_TRAP) + count++; + + return count; +} + +int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id, + enum devlink_trap_action *action) +{ + struct mlx5_devlink_trap *dl_trap; + + dl_trap = mlx5_find_trap_by_id(dev, trap_id); + if (!dl_trap) { + mlx5_core_err(dev, "Devlink trap: Get action on invalid trap id 0x%x", + trap_id); + return -EINVAL; + } + + *action = dl_trap->trap.action; + return 0; +} + +struct devlink *mlx5_devlink_alloc(struct device *dev) +{ + return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev), + dev); +} + +void mlx5_devlink_free(struct devlink *devlink) +{ + devlink_free(devlink); +} + +static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + bool new_state = val.vbool; + + if (new_state && !MLX5_CAP_GEN(dev, roce) && + !(MLX5_CAP_GEN(dev, roce_rw_supported) && MLX5_CAP_GEN_MAX(dev, roce))) { + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support RoCE"); + return -EOPNOTSUPP; + } + if (mlx5_core_is_mp_slave(dev) || mlx5_lag_is_active(dev)) { + NL_SET_ERR_MSG_MOD(extack, "Multi port slave/Lag device can't configure RoCE"); + return -EOPNOTSUPP; + } + + return 0; +} + +#ifdef CONFIG_MLX5_ESWITCH +static int mlx5_devlink_large_group_num_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + int group_num = val.vu32; + + if (group_num < 1 || group_num > 1024) { + NL_SET_ERR_MSG_MOD(extack, + "Unsupported group number, supported range is 1-1024"); + return -EOPNOTSUPP; + } + + return 0; +} +#endif + +static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + return (val.vu32 >= 64 && val.vu32 <= 4096) ? 0 : -EINVAL; +} + +static int +mlx5_devlink_hairpin_num_queues_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + return val.vu32 ? 0 : -EINVAL; +} + +static int +mlx5_devlink_hairpin_queue_size_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u32 val32 = val.vu32; + + if (!is_power_of_2(val32)) { + NL_SET_ERR_MSG_MOD(extack, "Value is not power of two"); + return -EINVAL; + } + + if (val32 > BIT(MLX5_CAP_GEN(dev, log_max_hairpin_num_packets))) { + NL_SET_ERR_MSG_FMT_MOD( + extack, "Maximum hairpin queue size is %lu", + BIT(MLX5_CAP_GEN(dev, log_max_hairpin_num_packets))); + return -EINVAL; + } + + return 0; +} + +static void mlx5_devlink_hairpin_params_init_values(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + union devlink_param_value value; + u32 link_speed = 0; + u64 link_speed64; + + /* set hairpin pair per each 50Gbs share of the link */ + mlx5_port_max_linkspeed(dev, &link_speed); + link_speed = max_t(u32, link_speed, 50000); + link_speed64 = link_speed; + do_div(link_speed64, 50000); + + value.vu32 = link_speed64; + devl_param_driverinit_value_set( + devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, value); + + value.vu32 = + BIT(min_t(u32, 16 - MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(dev), + MLX5_CAP_GEN(dev, log_max_hairpin_num_packets))); + devl_param_driverinit_value_set( + devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, value); +} + +static const struct devlink_param mlx5_devlink_params[] = { + DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, mlx5_devlink_enable_roce_validate), +#ifdef CONFIG_MLX5_ESWITCH + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, + "fdb_large_groups", DEVLINK_PARAM_TYPE_U32, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, + mlx5_devlink_large_group_num_validate), +#endif + DEVLINK_PARAM_GENERIC(IO_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, mlx5_devlink_eq_depth_validate), + DEVLINK_PARAM_GENERIC(EVENT_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, mlx5_devlink_eq_depth_validate), +}; + +static void mlx5_devlink_set_params_init_values(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + union devlink_param_value value; + + value.vbool = MLX5_CAP_GEN(dev, roce) && !mlx5_dev_is_lightweight(dev); + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, + value); + +#ifdef CONFIG_MLX5_ESWITCH + value.vu32 = ESW_OFFLOADS_DEFAULT_NUM_GROUPS; + devl_param_driverinit_value_set(devlink, + MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, + value); +#endif + + value.vu32 = MLX5_COMP_EQ_SIZE; + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, + value); + + value.vu32 = MLX5_NUM_ASYNC_EQE; + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, + value); +} + +static const struct devlink_param mlx5_devlink_eth_params[] = { + DEVLINK_PARAM_GENERIC(ENABLE_ETH, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, NULL), + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, + "hairpin_num_queues", DEVLINK_PARAM_TYPE_U32, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, + mlx5_devlink_hairpin_num_queues_validate), + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, + "hairpin_queue_size", DEVLINK_PARAM_TYPE_U32, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, + mlx5_devlink_hairpin_queue_size_validate), +}; + +static int mlx5_devlink_eth_params_register(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + union devlink_param_value value; + int err; + + if (!mlx5_eth_supported(dev)) + return 0; + + err = devl_params_register(devlink, mlx5_devlink_eth_params, + ARRAY_SIZE(mlx5_devlink_eth_params)); + if (err) + return err; + + value.vbool = !mlx5_dev_is_lightweight(dev); + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, + value); + + mlx5_devlink_hairpin_params_init_values(devlink); + + return 0; +} + +static void mlx5_devlink_eth_params_unregister(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!mlx5_eth_supported(dev)) + return; + + devl_params_unregister(devlink, mlx5_devlink_eth_params, + ARRAY_SIZE(mlx5_devlink_eth_params)); +} + +static int mlx5_devlink_enable_rdma_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + bool new_state = val.vbool; + + if (new_state && !mlx5_rdma_supported(dev)) + return -EOPNOTSUPP; + return 0; +} + +static const struct devlink_param mlx5_devlink_rdma_params[] = { + DEVLINK_PARAM_GENERIC(ENABLE_RDMA, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, mlx5_devlink_enable_rdma_validate), +}; + +static int mlx5_devlink_rdma_params_register(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + union devlink_param_value value; + int err; + + if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND)) + return 0; + + err = devl_params_register(devlink, mlx5_devlink_rdma_params, + ARRAY_SIZE(mlx5_devlink_rdma_params)); + if (err) + return err; + + value.vbool = !mlx5_dev_is_lightweight(dev); + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, + value); + return 0; +} + +static void mlx5_devlink_rdma_params_unregister(struct devlink *devlink) +{ + if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND)) + return; + + devl_params_unregister(devlink, mlx5_devlink_rdma_params, + ARRAY_SIZE(mlx5_devlink_rdma_params)); +} + +static const struct devlink_param mlx5_devlink_vnet_params[] = { + DEVLINK_PARAM_GENERIC(ENABLE_VNET, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, NULL), +}; + +static int mlx5_devlink_vnet_params_register(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + union devlink_param_value value; + int err; + + if (!mlx5_vnet_supported(dev)) + return 0; + + err = devl_params_register(devlink, mlx5_devlink_vnet_params, + ARRAY_SIZE(mlx5_devlink_vnet_params)); + if (err) + return err; + + value.vbool = !mlx5_dev_is_lightweight(dev); + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, + value); + return 0; +} + +static void mlx5_devlink_vnet_params_unregister(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!mlx5_vnet_supported(dev)) + return; + + devl_params_unregister(devlink, mlx5_devlink_vnet_params, + ARRAY_SIZE(mlx5_devlink_vnet_params)); +} + +static int mlx5_devlink_auxdev_params_register(struct devlink *devlink) +{ + int err; + + err = mlx5_devlink_eth_params_register(devlink); + if (err) + return err; + + err = mlx5_devlink_rdma_params_register(devlink); + if (err) + goto rdma_err; + + err = mlx5_devlink_vnet_params_register(devlink); + if (err) + goto vnet_err; + return 0; + +vnet_err: + mlx5_devlink_rdma_params_unregister(devlink); +rdma_err: + mlx5_devlink_eth_params_unregister(devlink); + return err; +} + +static void mlx5_devlink_auxdev_params_unregister(struct devlink *devlink) +{ + mlx5_devlink_vnet_params_unregister(devlink); + mlx5_devlink_rdma_params_unregister(devlink); + mlx5_devlink_eth_params_unregister(devlink); +} + +static int mlx5_devlink_max_uc_list_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (val.vu32 == 0) { + NL_SET_ERR_MSG_MOD(extack, "max_macs value must be greater than 0"); + return -EINVAL; + } + + if (!is_power_of_2(val.vu32)) { + NL_SET_ERR_MSG_MOD(extack, "Only power of 2 values are supported for max_macs"); + return -EINVAL; + } + + if (ilog2(val.vu32) > + MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list)) { + NL_SET_ERR_MSG_MOD(extack, "max_macs value is out of the supported range"); + return -EINVAL; + } + + return 0; +} + +static const struct devlink_param mlx5_devlink_max_uc_list_params[] = { + DEVLINK_PARAM_GENERIC(MAX_MACS, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, mlx5_devlink_max_uc_list_validate), +}; + +static int mlx5_devlink_max_uc_list_params_register(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + union devlink_param_value value; + int err; + + if (!MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list_wr_supported)) + return 0; + + err = devl_params_register(devlink, mlx5_devlink_max_uc_list_params, + ARRAY_SIZE(mlx5_devlink_max_uc_list_params)); + if (err) + return err; + + value.vu32 = 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list); + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_MAX_MACS, + value); + return 0; +} + +static void +mlx5_devlink_max_uc_list_params_unregister(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list_wr_supported)) + return; + + devl_params_unregister(devlink, mlx5_devlink_max_uc_list_params, + ARRAY_SIZE(mlx5_devlink_max_uc_list_params)); +} + +#define MLX5_TRAP_DROP(_id, _group_id) \ + DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ + DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT) + +static const struct devlink_trap mlx5_traps_arr[] = { + MLX5_TRAP_DROP(INGRESS_VLAN_FILTER, L2_DROPS), + MLX5_TRAP_DROP(DMAC_FILTER, L2_DROPS), +}; + +static const struct devlink_trap_group mlx5_trap_groups_arr[] = { + DEVLINK_TRAP_GROUP_GENERIC(L2_DROPS, 0), +}; + +int mlx5_devlink_traps_register(struct devlink *devlink) +{ + struct mlx5_core_dev *core_dev = devlink_priv(devlink); + int err; + + err = devl_trap_groups_register(devlink, mlx5_trap_groups_arr, + ARRAY_SIZE(mlx5_trap_groups_arr)); + if (err) + return err; + + err = devl_traps_register(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr), + &core_dev->priv); + if (err) + goto err_trap_group; + return 0; + +err_trap_group: + devl_trap_groups_unregister(devlink, mlx5_trap_groups_arr, + ARRAY_SIZE(mlx5_trap_groups_arr)); + return err; +} + +void mlx5_devlink_traps_unregister(struct devlink *devlink) +{ + devl_traps_unregister(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr)); + devl_trap_groups_unregister(devlink, mlx5_trap_groups_arr, + ARRAY_SIZE(mlx5_trap_groups_arr)); +} + +int mlx5_devlink_params_register(struct devlink *devlink) +{ + int err; + + /* Here only the driver init params should be registered. + * Runtime params should be registered by the code which + * behaviour they configure. + */ + + err = devl_params_register(devlink, mlx5_devlink_params, + ARRAY_SIZE(mlx5_devlink_params)); + if (err) + return err; + + mlx5_devlink_set_params_init_values(devlink); + + err = mlx5_devlink_auxdev_params_register(devlink); + if (err) + goto auxdev_reg_err; + + err = mlx5_devlink_max_uc_list_params_register(devlink); + if (err) + goto max_uc_list_err; + + return 0; + +max_uc_list_err: + mlx5_devlink_auxdev_params_unregister(devlink); +auxdev_reg_err: + devl_params_unregister(devlink, mlx5_devlink_params, + ARRAY_SIZE(mlx5_devlink_params)); + return err; +} + +void mlx5_devlink_params_unregister(struct devlink *devlink) +{ + mlx5_devlink_max_uc_list_params_unregister(devlink); + mlx5_devlink_auxdev_params_unregister(devlink); + devl_params_unregister(devlink, mlx5_devlink_params, + ARRAY_SIZE(mlx5_devlink_params)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h new file mode 100644 index 0000000000..961f75da62 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019, Mellanox Technologies */ + +#ifndef __MLX5_DEVLINK_H__ +#define __MLX5_DEVLINK_H__ + +#include + +enum mlx5_devlink_resource_id { + MLX5_DL_RES_MAX_LOCAL_SFS = 1, + MLX5_DL_RES_MAX_EXTERNAL_SFS, + + __MLX5_ID_RES_MAX, + MLX5_ID_RES_MAX = __MLX5_ID_RES_MAX - 1, +}; + +enum mlx5_devlink_param_id { + MLX5_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, + MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE, + MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, + MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA, + MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT, + MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, + MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, +}; + +struct mlx5_trap_ctx { + int id; + int action; +}; + +struct mlx5_devlink_trap { + struct mlx5_trap_ctx trap; + void *item; + struct list_head list; +}; + +struct mlx5_devlink_trap_event_ctx { + struct mlx5_trap_ctx *trap; + int err; +}; + +struct mlx5_core_dev; +void mlx5_devlink_trap_report(struct mlx5_core_dev *dev, int trap_id, struct sk_buff *skb, + struct devlink_port *dl_port); +int mlx5_devlink_trap_get_num_active(struct mlx5_core_dev *dev); +int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id, + enum devlink_trap_action *action); +int mlx5_devlink_traps_register(struct devlink *devlink); +void mlx5_devlink_traps_unregister(struct devlink *devlink); + +struct devlink *mlx5_devlink_alloc(struct device *dev); +void mlx5_devlink_free(struct devlink *devlink); +int mlx5_devlink_params_register(struct devlink *devlink); +void mlx5_devlink_params_unregister(struct devlink *devlink); + +static inline bool mlx5_core_is_eth_enabled(struct mlx5_core_dev *dev) +{ + union devlink_param_value val; + int err; + + err = devl_param_driverinit_value_get(priv_to_devlink(dev), + DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, + &val); + return err ? false : val.vbool; +} + +#endif /* __MLX5_DEVLINK_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h new file mode 100644 index 0000000000..406ebe1740 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_CMD_TP_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_CMD_TP_H_ + +#include +#include + +TRACE_EVENT(mlx5_cmd, + TP_PROTO(const char *command_str, u16 opcode, u16 op_mod, + const char *status_str, u8 status, u32 syndrome, int err), + TP_ARGS(command_str, opcode, op_mod, status_str, status, syndrome, err), + TP_STRUCT__entry(__string(command_str, command_str) + __field(u16, opcode) + __field(u16, op_mod) + __string(status_str, status_str) + __field(u8, status) + __field(u32, syndrome) + __field(int, err) + ), + TP_fast_assign(__assign_str(command_str, command_str); + __entry->opcode = opcode; + __entry->op_mod = op_mod; + __assign_str(status_str, status_str); + __entry->status = status; + __entry->syndrome = syndrome; + __entry->err = err; + ), + TP_printk("%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x), err(%d)", + __get_str(command_str), __entry->opcode, __entry->op_mod, + __get_str(status_str), __entry->status, __entry->syndrome, + __entry->err) +); + +#endif /* _MLX5_CMD_TP_H_ */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ./diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE cmd_tracepoint +#include diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c new file mode 100644 index 0000000000..28d02749d3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies */ + +#include +#include "mlx5_core.h" +#include "lib/pci_vsc.h" +#include "lib/mlx5.h" + +#define BAD_ACCESS 0xBADACCE5 +#define MLX5_PROTECTED_CR_SCAN_CRSPACE 0x7 + +static bool mlx5_crdump_enabled(struct mlx5_core_dev *dev) +{ + return !!dev->priv.health.crdump_size; +} + +static int mlx5_crdump_fill(struct mlx5_core_dev *dev, u32 *cr_data) +{ + u32 crdump_size = dev->priv.health.crdump_size; + int i, ret; + + for (i = 0; i < (crdump_size / 4); i++) + cr_data[i] = BAD_ACCESS; + + ret = mlx5_vsc_gw_read_block_fast(dev, cr_data, crdump_size); + if (ret <= 0) { + if (ret == 0) + return -EIO; + return ret; + } + + if (crdump_size != ret) { + mlx5_core_warn(dev, "failed to read full dump, read %d out of %u\n", + ret, crdump_size); + return -EINVAL; + } + + return 0; +} + +int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data) +{ + int ret; + + if (!mlx5_crdump_enabled(dev)) + return -ENODEV; + + ret = mlx5_vsc_gw_lock(dev); + if (ret) { + mlx5_core_warn(dev, "crdump: failed to lock vsc gw err %d\n", + ret); + return ret; + } + /* Verify no other PF is running cr-dump or sw reset */ + ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, + MLX5_VSC_LOCK); + if (ret) { + mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n"); + goto unlock_gw; + } + + ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE, NULL); + if (ret) + goto unlock_sem; + + ret = mlx5_crdump_fill(dev, cr_data); + +unlock_sem: + mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, MLX5_VSC_UNLOCK); +unlock_gw: + mlx5_vsc_gw_unlock(dev); + return ret; +} + +int mlx5_crdump_enable(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + u32 space_size; + int ret; + + if (!mlx5_core_is_pf(dev) || !mlx5_vsc_accessible(dev) || + mlx5_crdump_enabled(dev)) + return 0; + + ret = mlx5_vsc_gw_lock(dev); + if (ret) + return ret; + + /* Check if space is supported and get space size */ + ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE, + &space_size); + if (ret) { + /* Unlock and mask error since space is not supported */ + mlx5_vsc_gw_unlock(dev); + return 0; + } + + if (!space_size) { + mlx5_core_warn(dev, "Invalid Crspace size, zero\n"); + mlx5_vsc_gw_unlock(dev); + return -EINVAL; + } + + ret = mlx5_vsc_gw_unlock(dev); + if (ret) + return ret; + + priv->health.crdump_size = space_size; + return 0; +} + +void mlx5_crdump_disable(struct mlx5_core_dev *dev) +{ + dev->priv.health.crdump_size = 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h new file mode 100644 index 0000000000..f15718db5d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_EN_REP_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_EN_REP_TP_ + +#include +#include +#include "en_rep.h" + +TRACE_EVENT(mlx5e_rep_neigh_update, + TP_PROTO(const struct mlx5e_neigh_hash_entry *nhe, const u8 *ha, + bool neigh_connected), + TP_ARGS(nhe, ha, neigh_connected), + TP_STRUCT__entry(__string(devname, nhe->neigh_dev->name) + __array(u8, ha, ETH_ALEN) + __array(u8, v4, 4) + __array(u8, v6, 16) + __field(bool, neigh_connected) + ), + TP_fast_assign(const struct mlx5e_neigh *mn = &nhe->m_neigh; + struct in6_addr *pin6; + __be32 *p32; + + __assign_str(devname, nhe->neigh_dev->name); + __entry->neigh_connected = neigh_connected; + memcpy(__entry->ha, ha, ETH_ALEN); + + p32 = (__be32 *)__entry->v4; + pin6 = (struct in6_addr *)__entry->v6; + if (mn->family == AF_INET) { + *p32 = mn->dst_ip.v4; + ipv6_addr_set_v4mapped(*p32, pin6); + } else if (mn->family == AF_INET6) { + *pin6 = mn->dst_ip.v6; + } + ), + TP_printk("netdev: %s MAC: %pM IPv4: %pI4 IPv6: %pI6c neigh_connected=%d\n", + __get_str(devname), __entry->ha, + __entry->v4, __entry->v6, __entry->neigh_connected + ) +); + +#endif /* _MLX5_EN_REP_TP_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ./diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE en_rep_tracepoint +#include diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.c new file mode 100644 index 0000000000..c5dc6c50fa --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#define CREATE_TRACE_POINTS +#include "en_tc_tracepoint.h" + +void put_ids_to_array(int *ids, + const struct flow_action_entry *entries, + unsigned int num) +{ + unsigned int i; + + for (i = 0; i < num; i++) + ids[i] = entries[i].id; +} + +#define NAME_SIZE 16 + +static const char FLOWACT2STR[NUM_FLOW_ACTIONS][NAME_SIZE] = { + [FLOW_ACTION_ACCEPT] = "ACCEPT", + [FLOW_ACTION_DROP] = "DROP", + [FLOW_ACTION_TRAP] = "TRAP", + [FLOW_ACTION_GOTO] = "GOTO", + [FLOW_ACTION_REDIRECT] = "REDIRECT", + [FLOW_ACTION_MIRRED] = "MIRRED", + [FLOW_ACTION_VLAN_PUSH] = "VLAN_PUSH", + [FLOW_ACTION_VLAN_POP] = "VLAN_POP", + [FLOW_ACTION_VLAN_MANGLE] = "VLAN_MANGLE", + [FLOW_ACTION_TUNNEL_ENCAP] = "TUNNEL_ENCAP", + [FLOW_ACTION_TUNNEL_DECAP] = "TUNNEL_DECAP", + [FLOW_ACTION_MANGLE] = "MANGLE", + [FLOW_ACTION_ADD] = "ADD", + [FLOW_ACTION_CSUM] = "CSUM", + [FLOW_ACTION_MARK] = "MARK", + [FLOW_ACTION_WAKE] = "WAKE", + [FLOW_ACTION_QUEUE] = "QUEUE", + [FLOW_ACTION_SAMPLE] = "SAMPLE", + [FLOW_ACTION_POLICE] = "POLICE", + [FLOW_ACTION_CT] = "CT", +}; + +const char *parse_action(struct trace_seq *p, + int *ids, + unsigned int num) +{ + const char *ret = trace_seq_buffer_ptr(p); + unsigned int i; + + for (i = 0; i < num; i++) { + if (ids[i] < NUM_FLOW_ACTIONS) + trace_seq_printf(p, "%s ", FLOWACT2STR[ids[i]]); + else + trace_seq_printf(p, "UNKNOWN "); + } + + trace_seq_putc(p, 0); + return ret; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h new file mode 100644 index 0000000000..ac52ef37f3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_TC_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_TC_TP_ + +#include +#include +#include +#include "en_rep.h" + +#define __parse_action(ids, num) parse_action(p, ids, num) + +void put_ids_to_array(int *ids, + const struct flow_action_entry *entries, + unsigned int num); + +const char *parse_action(struct trace_seq *p, + int *ids, + unsigned int num); + +DECLARE_EVENT_CLASS(mlx5e_flower_template, + TP_PROTO(const struct flow_cls_offload *f), + TP_ARGS(f), + TP_STRUCT__entry(__field(void *, cookie) + __field(unsigned int, num) + __dynamic_array(int, ids, f->rule ? + f->rule->action.num_entries : 0) + ), + TP_fast_assign(__entry->cookie = (void *)f->cookie; + __entry->num = (f->rule ? + f->rule->action.num_entries : 0); + if (__entry->num) + put_ids_to_array(__get_dynamic_array(ids), + f->rule->action.entries, + f->rule->action.num_entries); + ), + TP_printk("cookie=%p actions= %s\n", + __entry->cookie, __entry->num ? + __parse_action(__get_dynamic_array(ids), + __entry->num) : "NULL" + ) +); + +DEFINE_EVENT(mlx5e_flower_template, mlx5e_configure_flower, + TP_PROTO(const struct flow_cls_offload *f), + TP_ARGS(f) + ); + +DEFINE_EVENT(mlx5e_flower_template, mlx5e_delete_flower, + TP_PROTO(const struct flow_cls_offload *f), + TP_ARGS(f) + ); + +TRACE_EVENT(mlx5e_stats_flower, + TP_PROTO(const struct flow_cls_offload *f), + TP_ARGS(f), + TP_STRUCT__entry(__field(void *, cookie) + __field(u64, bytes) + __field(u64, packets) + __field(u64, lastused) + ), + TP_fast_assign(__entry->cookie = (void *)f->cookie; + __entry->bytes = f->stats.bytes; + __entry->packets = f->stats.pkts; + __entry->lastused = f->stats.lastused; + ), + TP_printk("cookie=%p bytes=%llu packets=%llu lastused=%llu\n", + __entry->cookie, __entry->bytes, + __entry->packets, __entry->lastused + ) +); + +TRACE_EVENT(mlx5e_tc_update_neigh_used_value, + TP_PROTO(const struct mlx5e_neigh_hash_entry *nhe, bool neigh_used), + TP_ARGS(nhe, neigh_used), + TP_STRUCT__entry(__string(devname, nhe->neigh_dev->name) + __array(u8, v4, 4) + __array(u8, v6, 16) + __field(bool, neigh_used) + ), + TP_fast_assign(const struct mlx5e_neigh *mn = &nhe->m_neigh; + struct in6_addr *pin6; + __be32 *p32; + + __assign_str(devname, nhe->neigh_dev->name); + __entry->neigh_used = neigh_used; + + p32 = (__be32 *)__entry->v4; + pin6 = (struct in6_addr *)__entry->v6; + if (mn->family == AF_INET) { + *p32 = mn->dst_ip.v4; + ipv6_addr_set_v4mapped(*p32, pin6); + } else if (mn->family == AF_INET6) { + *pin6 = mn->dst_ip.v6; + } + ), + TP_printk("netdev: %s IPv4: %pI4 IPv6: %pI6c neigh_used=%d\n", + __get_str(devname), __entry->v4, __entry->v6, + __entry->neigh_used + ) +); + +#endif /* _MLX5_TC_TP_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ./diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE en_tc_tracepoint +#include diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c new file mode 100644 index 0000000000..6d73127b72 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define CREATE_TRACE_POINTS + +#include "fs_tracepoint.h" +#include + +#define DECLARE_MASK_VAL(type, name) struct {type m; type v; } name +#define MASK_VAL(type, spec, name, mask, val, fld) \ + DECLARE_MASK_VAL(type, name) = \ + {.m = MLX5_GET(spec, mask, fld),\ + .v = MLX5_GET(spec, val, fld)} +#define MASK_VAL_BE(type, spec, name, mask, val, fld) \ + DECLARE_MASK_VAL(type, name) = \ + {.m = MLX5_GET_BE(type, spec, mask, fld),\ + .v = MLX5_GET_BE(type, spec, val, fld)} +#define GET_MASKED_VAL(name) (name.m & name.v) + +#define GET_MASK_VAL(name, type, mask, val, fld) \ + (name.m = MLX5_GET(type, mask, fld), \ + name.v = MLX5_GET(type, val, fld), \ + name.m & name.v) +#define PRINT_MASKED_VAL(name, p, format) { \ + if (name.m) \ + trace_seq_printf(p, __stringify(name) "=" format " ", name.v); \ + } +#define PRINT_MASKED_VALP(name, cast, p, format) { \ + if (name.m) \ + trace_seq_printf(p, __stringify(name) "=" format " ", \ + (cast)&name.v);\ + } + +static void print_lyr_2_4_hdrs(struct trace_seq *p, + const u32 *mask, const u32 *value) +{ +#define MASK_VAL_L2(type, name, fld) \ + MASK_VAL(type, fte_match_set_lyr_2_4, name, mask, value, fld) + DECLARE_MASK_VAL(u64, smac) = { + .m = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, mask, smac_15_0), + .v = MLX5_GET(fte_match_set_lyr_2_4, value, smac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, value, smac_15_0)}; + DECLARE_MASK_VAL(u64, dmac) = { + .m = MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_15_0), + .v = MLX5_GET(fte_match_set_lyr_2_4, value, dmac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, value, dmac_15_0)}; + MASK_VAL_L2(u16, ethertype, ethertype); + MASK_VAL_L2(u8, ip_version, ip_version); + + PRINT_MASKED_VALP(smac, u8 *, p, "%pM"); + PRINT_MASKED_VALP(dmac, u8 *, p, "%pM"); + PRINT_MASKED_VAL(ethertype, p, "%04x"); + + if ((ethertype.m == 0xffff && ethertype.v == ETH_P_IP) || + (ip_version.m == 0xf && ip_version.v == 4)) { +#define MASK_VAL_L2_BE(type, name, fld) \ + MASK_VAL_BE(type, fte_match_set_lyr_2_4, name, mask, value, fld) + MASK_VAL_L2_BE(u32, src_ipv4, + src_ipv4_src_ipv6.ipv4_layout.ipv4); + MASK_VAL_L2_BE(u32, dst_ipv4, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + + PRINT_MASKED_VALP(src_ipv4, typeof(&src_ipv4.v), p, + "%pI4"); + PRINT_MASKED_VALP(dst_ipv4, typeof(&dst_ipv4.v), p, + "%pI4"); + } else if ((ethertype.m == 0xffff && ethertype.v == ETH_P_IPV6) || + (ip_version.m == 0xf && ip_version.v == 6)) { + static const struct in6_addr full_ones = { + .in6_u.u6_addr32 = {__constant_htonl(0xffffffff), + __constant_htonl(0xffffffff), + __constant_htonl(0xffffffff), + __constant_htonl(0xffffffff)}, + }; + DECLARE_MASK_VAL(struct in6_addr, src_ipv6); + DECLARE_MASK_VAL(struct in6_addr, dst_ipv6); + + memcpy(src_ipv6.m.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + sizeof(src_ipv6.m)); + memcpy(dst_ipv6.m.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + sizeof(dst_ipv6.m)); + memcpy(src_ipv6.v.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, value, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + sizeof(src_ipv6.v)); + memcpy(dst_ipv6.v.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, value, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + sizeof(dst_ipv6.v)); + + if (!memcmp(&src_ipv6.m, &full_ones, sizeof(full_ones))) + trace_seq_printf(p, "src_ipv6=%pI6 ", + &src_ipv6.v); + if (!memcmp(&dst_ipv6.m, &full_ones, sizeof(full_ones))) + trace_seq_printf(p, "dst_ipv6=%pI6 ", + &dst_ipv6.v); + } + +#define PRINT_MASKED_VAL_L2(type, name, fld, p, format) {\ + MASK_VAL_L2(type, name, fld); \ + PRINT_MASKED_VAL(name, p, format); \ +} + + PRINT_MASKED_VAL_L2(u8, ip_protocol, ip_protocol, p, "%02x"); + PRINT_MASKED_VAL_L2(u16, tcp_flags, tcp_flags, p, "%x"); + PRINT_MASKED_VAL_L2(u16, tcp_sport, tcp_sport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, tcp_dport, tcp_dport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, udp_sport, udp_sport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, udp_dport, udp_dport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, first_vid, first_vid, p, "%04x"); + PRINT_MASKED_VAL_L2(u8, first_prio, first_prio, p, "%x"); + PRINT_MASKED_VAL_L2(u8, first_cfi, first_cfi, p, "%d"); + PRINT_MASKED_VAL_L2(u8, ip_dscp, ip_dscp, p, "%02x"); + PRINT_MASKED_VAL_L2(u8, ip_ecn, ip_ecn, p, "%x"); + PRINT_MASKED_VAL_L2(u8, cvlan_tag, cvlan_tag, p, "%d"); + PRINT_MASKED_VAL_L2(u8, svlan_tag, svlan_tag, p, "%d"); + PRINT_MASKED_VAL_L2(u8, frag, frag, p, "%d"); +} + +static void print_misc_parameters_hdrs(struct trace_seq *p, + const u32 *mask, const u32 *value) +{ +#define MASK_VAL_MISC(type, name, fld) \ + MASK_VAL(type, fte_match_set_misc, name, mask, value, fld) +#define PRINT_MASKED_VAL_MISC(type, name, fld, p, format) {\ + MASK_VAL_MISC(type, name, fld); \ + PRINT_MASKED_VAL(name, p, format); \ +} + DECLARE_MASK_VAL(u64, gre_key) = { + .m = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.hi) << 8 | + MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.lo), + .v = MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.hi) << 8 | + MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.lo)}; + + PRINT_MASKED_VAL(gre_key, p, "%llu"); + PRINT_MASKED_VAL_MISC(u32, source_sqn, source_sqn, p, "%u"); + PRINT_MASKED_VAL_MISC(u16, source_port, source_port, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, outer_second_prio, outer_second_prio, + p, "%u"); + PRINT_MASKED_VAL_MISC(u8, outer_second_cfi, outer_second_cfi, p, "%u"); + PRINT_MASKED_VAL_MISC(u16, outer_second_vid, outer_second_vid, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_prio, inner_second_prio, + p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_cfi, inner_second_cfi, p, "%u"); + PRINT_MASKED_VAL_MISC(u16, inner_second_vid, inner_second_vid, p, "%u"); + + PRINT_MASKED_VAL_MISC(u8, outer_second_cvlan_tag, + outer_second_cvlan_tag, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_cvlan_tag, + inner_second_cvlan_tag, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, outer_second_svlan_tag, + outer_second_svlan_tag, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_svlan_tag, + inner_second_svlan_tag, p, "%u"); + + PRINT_MASKED_VAL_MISC(u8, gre_protocol, gre_protocol, p, "%u"); + + PRINT_MASKED_VAL_MISC(u32, vxlan_vni, vxlan_vni, p, "%u"); + PRINT_MASKED_VAL_MISC(u32, outer_ipv6_flow_label, outer_ipv6_flow_label, + p, "%x"); + PRINT_MASKED_VAL_MISC(u32, inner_ipv6_flow_label, inner_ipv6_flow_label, + p, "%x"); +} + +const char *parse_fs_hdrs(struct trace_seq *p, + u8 match_criteria_enable, + const u32 *mask_outer, + const u32 *mask_misc, + const u32 *mask_inner, + const u32 *value_outer, + const u32 *value_misc, + const u32 *value_inner) +{ + const char *ret = trace_seq_buffer_ptr(p); + + if (match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) { + trace_seq_printf(p, "[outer] "); + print_lyr_2_4_hdrs(p, mask_outer, value_outer); + } + + if (match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) { + trace_seq_printf(p, "[misc] "); + print_misc_parameters_hdrs(p, mask_misc, value_misc); + } + if (match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) { + trace_seq_printf(p, "[inner] "); + print_lyr_2_4_hdrs(p, mask_inner, value_inner); + } + trace_seq_putc(p, 0); + return ret; +} + +static const char +*fs_dest_range_field_to_str(enum mlx5_flow_dest_range_field field) +{ + switch (field) { + case MLX5_FLOW_DEST_RANGE_FIELD_PKT_LEN: + return "packet len"; + default: + return "unknown dest range field"; + } +} + +const char *parse_fs_dst(struct trace_seq *p, + const struct mlx5_flow_destination *dst, + u32 counter_id) +{ + const char *ret = trace_seq_buffer_ptr(p); + + switch (dst->type) { + case MLX5_FLOW_DESTINATION_TYPE_UPLINK: + trace_seq_printf(p, "uplink\n"); + break; + case MLX5_FLOW_DESTINATION_TYPE_VPORT: + trace_seq_printf(p, "vport=%u\n", dst->vport.num); + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: + trace_seq_printf(p, "ft=%p\n", dst->ft); + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: + trace_seq_printf(p, "ft_num=%u\n", dst->ft_num); + break; + case MLX5_FLOW_DESTINATION_TYPE_TIR: + trace_seq_printf(p, "tir=%u\n", dst->tir_num); + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER: + trace_seq_printf(p, "sampler_id=%u\n", dst->sampler_id); + break; + case MLX5_FLOW_DESTINATION_TYPE_COUNTER: + trace_seq_printf(p, "counter_id=%u\n", counter_id); + break; + case MLX5_FLOW_DESTINATION_TYPE_PORT: + trace_seq_printf(p, "port\n"); + break; + case MLX5_FLOW_DESTINATION_TYPE_RANGE: + trace_seq_printf(p, "field=%s min=%d max=%d\n", + fs_dest_range_field_to_str(dst->range.field), + dst->range.min, dst->range.max); + break; + case MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE: + trace_seq_printf(p, "flow_table_type=%u id:%u\n", dst->ft->type, + dst->ft->id); + break; + case MLX5_FLOW_DESTINATION_TYPE_NONE: + trace_seq_printf(p, "none\n"); + break; + } + + trace_seq_putc(p, 0); + return ret; +} + +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_ft); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_ft); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_fg); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fg); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_set_fte); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fte); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_rule); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_rule); + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h new file mode 100644 index 0000000000..ddf1b87f1b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -0,0 +1,323 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if !defined(_MLX5_FS_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_FS_TP_ + +#include +#include +#include "../fs_core.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#define __parse_fs_hdrs(match_criteria_enable, mouter, mmisc, minner, vouter, \ + vinner, vmisc) \ + parse_fs_hdrs(p, match_criteria_enable, mouter, mmisc, minner, vouter,\ + vinner, vmisc) + +const char *parse_fs_hdrs(struct trace_seq *p, + u8 match_criteria_enable, + const u32 *mask_outer, + const u32 *mask_misc, + const u32 *mask_inner, + const u32 *value_outer, + const u32 *value_misc, + const u32 *value_inner); + +#define __parse_fs_dst(dst, counter_id) \ + parse_fs_dst(p, (const struct mlx5_flow_destination *)dst, counter_id) + +const char *parse_fs_dst(struct trace_seq *p, + const struct mlx5_flow_destination *dst, + u32 counter_id); + +TRACE_EVENT(mlx5_fs_add_ft, + TP_PROTO(const struct mlx5_flow_table *ft), + TP_ARGS(ft), + TP_STRUCT__entry( + __field(const struct mlx5_flow_table *, ft) + __field(u32, id) + __field(u32, level) + __field(u32, type) + ), + TP_fast_assign( + __entry->ft = ft; + __entry->id = ft->id; + __entry->level = ft->level; + __entry->type = ft->type; + ), + TP_printk("ft=%p id=%u level=%u type=%u \n", + __entry->ft, __entry->id, __entry->level, __entry->type) + ); + +TRACE_EVENT(mlx5_fs_del_ft, + TP_PROTO(const struct mlx5_flow_table *ft), + TP_ARGS(ft), + TP_STRUCT__entry( + __field(const struct mlx5_flow_table *, ft) + __field(u32, id) + ), + TP_fast_assign( + __entry->ft = ft; + __entry->id = ft->id; + + ), + TP_printk("ft=%p id=%u\n", + __entry->ft, __entry->id) + ); + +TRACE_EVENT(mlx5_fs_add_fg, + TP_PROTO(const struct mlx5_flow_group *fg), + TP_ARGS(fg), + TP_STRUCT__entry( + __field(const struct mlx5_flow_group *, fg) + __field(const struct mlx5_flow_table *, ft) + __field(u32, start_index) + __field(u32, end_index) + __field(u32, id) + __field(u8, mask_enable) + __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_misc, MLX5_ST_SZ_DW(fte_match_set_misc)) + ), + TP_fast_assign( + __entry->fg = fg; + fs_get_obj(__entry->ft, fg->node.parent); + __entry->start_index = fg->start_index; + __entry->end_index = fg->start_index + fg->max_ftes; + __entry->id = fg->id; + __entry->mask_enable = fg->mask.match_criteria_enable; + memcpy(__entry->mask_outer, + MLX5_ADDR_OF(fte_match_param, + &fg->mask.match_criteria, + outer_headers), + sizeof(__entry->mask_outer)); + memcpy(__entry->mask_inner, + MLX5_ADDR_OF(fte_match_param, + &fg->mask.match_criteria, + inner_headers), + sizeof(__entry->mask_inner)); + memcpy(__entry->mask_misc, + MLX5_ADDR_OF(fte_match_param, + &fg->mask.match_criteria, + misc_parameters), + sizeof(__entry->mask_misc)); + + ), + TP_printk("fg=%p ft=%p id=%u start=%u end=%u bit_mask=%02x %s\n", + __entry->fg, __entry->ft, __entry->id, + __entry->start_index, __entry->end_index, + __entry->mask_enable, + __parse_fs_hdrs(__entry->mask_enable, + __entry->mask_outer, + __entry->mask_misc, + __entry->mask_inner, + __entry->mask_outer, + __entry->mask_misc, + __entry->mask_inner)) + ); + +TRACE_EVENT(mlx5_fs_del_fg, + TP_PROTO(const struct mlx5_flow_group *fg), + TP_ARGS(fg), + TP_STRUCT__entry( + __field(const struct mlx5_flow_group *, fg) + __field(u32, id) + ), + TP_fast_assign( + __entry->fg = fg; + __entry->id = fg->id; + + ), + TP_printk("fg=%p id=%u\n", + __entry->fg, __entry->id) + ); + +#define ACTION_FLAGS \ + {MLX5_FLOW_CONTEXT_ACTION_ALLOW, "ALLOW"},\ + {MLX5_FLOW_CONTEXT_ACTION_DROP, "DROP"},\ + {MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, "FWD"},\ + {MLX5_FLOW_CONTEXT_ACTION_COUNT, "CNT"},\ + {MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT, "REFORMAT"},\ + {MLX5_FLOW_CONTEXT_ACTION_DECAP, "DECAP"},\ + {MLX5_FLOW_CONTEXT_ACTION_MOD_HDR, "MOD_HDR"},\ + {MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH, "VLAN_PUSH"},\ + {MLX5_FLOW_CONTEXT_ACTION_VLAN_POP, "VLAN_POP"},\ + {MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2, "VLAN_PUSH_2"},\ + {MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2, "VLAN_POP_2"},\ + {MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO, "NEXT_PRIO"} + +TRACE_EVENT(mlx5_fs_set_fte, + TP_PROTO(const struct fs_fte *fte, int new_fte), + TP_ARGS(fte, new_fte), + TP_STRUCT__entry( + __field(const struct fs_fte *, fte) + __field(const struct mlx5_flow_group *, fg) + __field(u32, group_index) + __field(u32, index) + __field(u32, action) + __field(u32, flow_tag) + __field(u32, flow_source) + __field(u8, mask_enable) + __field(int, new_fte) + __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_misc, MLX5_ST_SZ_DW(fte_match_set_misc)) + __array(u32, value_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, value_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, value_misc, MLX5_ST_SZ_DW(fte_match_set_misc)) + ), + TP_fast_assign( + __entry->fte = fte; + __entry->new_fte = new_fte; + fs_get_obj(__entry->fg, fte->node.parent); + __entry->group_index = __entry->fg->id; + __entry->index = fte->index; + __entry->action = fte->action.action; + __entry->mask_enable = __entry->fg->mask.match_criteria_enable; + __entry->flow_tag = fte->flow_context.flow_tag; + __entry->flow_source = fte->flow_context.flow_source; + memcpy(__entry->mask_outer, + MLX5_ADDR_OF(fte_match_param, + &__entry->fg->mask.match_criteria, + outer_headers), + sizeof(__entry->mask_outer)); + memcpy(__entry->mask_inner, + MLX5_ADDR_OF(fte_match_param, + &__entry->fg->mask.match_criteria, + inner_headers), + sizeof(__entry->mask_inner)); + memcpy(__entry->mask_misc, + MLX5_ADDR_OF(fte_match_param, + &__entry->fg->mask.match_criteria, + misc_parameters), + sizeof(__entry->mask_misc)); + memcpy(__entry->value_outer, + MLX5_ADDR_OF(fte_match_param, + &fte->val, + outer_headers), + sizeof(__entry->value_outer)); + memcpy(__entry->value_inner, + MLX5_ADDR_OF(fte_match_param, + &fte->val, + inner_headers), + sizeof(__entry->value_inner)); + memcpy(__entry->value_misc, + MLX5_ADDR_OF(fte_match_param, + &fte->val, + misc_parameters), + sizeof(__entry->value_misc)); + + ), + TP_printk("op=%s fte=%p fg=%p index=%u group_index=%u action=<%s> flow_tag=%x %s\n", + __entry->new_fte ? "add" : "set", + __entry->fte, __entry->fg, __entry->index, + __entry->group_index, __print_flags(__entry->action, "|", + ACTION_FLAGS), + __entry->flow_tag, + __parse_fs_hdrs(__entry->mask_enable, + __entry->mask_outer, + __entry->mask_misc, + __entry->mask_inner, + __entry->value_outer, + __entry->value_misc, + __entry->value_inner)) + ); + +TRACE_EVENT(mlx5_fs_del_fte, + TP_PROTO(const struct fs_fte *fte), + TP_ARGS(fte), + TP_STRUCT__entry( + __field(const struct fs_fte *, fte) + __field(u32, index) + ), + TP_fast_assign( + __entry->fte = fte; + __entry->index = fte->index; + + ), + TP_printk("fte=%p index=%u\n", + __entry->fte, __entry->index) + ); + +TRACE_EVENT(mlx5_fs_add_rule, + TP_PROTO(const struct mlx5_flow_rule *rule), + TP_ARGS(rule), + TP_STRUCT__entry( + __field(const struct mlx5_flow_rule *, rule) + __field(const struct fs_fte *, fte) + __field(u32, sw_action) + __field(u32, index) + __field(u32, counter_id) + __array(u8, destination, sizeof(struct mlx5_flow_destination)) + ), + TP_fast_assign( + __entry->rule = rule; + fs_get_obj(__entry->fte, rule->node.parent); + __entry->index = __entry->fte->dests_size - 1; + __entry->sw_action = rule->sw_action; + memcpy(__entry->destination, + &rule->dest_attr, + sizeof(__entry->destination)); + if (rule->dest_attr.type & + MLX5_FLOW_DESTINATION_TYPE_COUNTER) + __entry->counter_id = + rule->dest_attr.counter_id; + ), + TP_printk("rule=%p fte=%p index=%u sw_action=<%s> [dst] %s\n", + __entry->rule, __entry->fte, __entry->index, + __print_flags(__entry->sw_action, "|", ACTION_FLAGS), + __parse_fs_dst(__entry->destination, __entry->counter_id)) + ); + +TRACE_EVENT(mlx5_fs_del_rule, + TP_PROTO(const struct mlx5_flow_rule *rule), + TP_ARGS(rule), + TP_STRUCT__entry( + __field(const struct mlx5_flow_rule *, rule) + __field(const struct fs_fte *, fte) + ), + TP_fast_assign( + __entry->rule = rule; + fs_get_obj(__entry->fte, rule->node.parent); + ), + TP_printk("rule=%p fte=%p\n", + __entry->rule, __entry->fte) + ); +#endif + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ./diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE fs_tracepoint +#include diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c new file mode 100644 index 0000000000..85d3bfa078 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -0,0 +1,1215 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#define CREATE_TRACE_POINTS +#include "lib/eq.h" +#include "fw_tracer.h" +#include "fw_tracer_tracepoint.h" + +static int mlx5_query_mtrc_caps(struct mlx5_fw_tracer *tracer) +{ + u32 *string_db_base_address_out = tracer->str_db.base_address_out; + u32 *string_db_size_out = tracer->str_db.size_out; + struct mlx5_core_dev *dev = tracer->dev; + u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0}; + u32 in[MLX5_ST_SZ_DW(mtrc_cap)] = {0}; + void *mtrc_cap_sp; + int err, i; + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MTRC_CAP, 0, 0); + if (err) { + mlx5_core_warn(dev, "FWTracer: Error reading tracer caps %d\n", + err); + return err; + } + + if (!MLX5_GET(mtrc_cap, out, trace_to_memory)) { + mlx5_core_dbg(dev, "FWTracer: Device does not support logging traces to memory\n"); + return -ENOTSUPP; + } + + tracer->trc_ver = MLX5_GET(mtrc_cap, out, trc_ver); + tracer->str_db.first_string_trace = + MLX5_GET(mtrc_cap, out, first_string_trace); + tracer->str_db.num_string_trace = + MLX5_GET(mtrc_cap, out, num_string_trace); + tracer->str_db.num_string_db = MLX5_GET(mtrc_cap, out, num_string_db); + tracer->owner = !!MLX5_GET(mtrc_cap, out, trace_owner); + tracer->str_db.loaded = false; + + for (i = 0; i < tracer->str_db.num_string_db; i++) { + mtrc_cap_sp = MLX5_ADDR_OF(mtrc_cap, out, string_db_param[i]); + string_db_base_address_out[i] = MLX5_GET(mtrc_string_db_param, + mtrc_cap_sp, + string_db_base_address); + string_db_size_out[i] = MLX5_GET(mtrc_string_db_param, + mtrc_cap_sp, string_db_size); + } + + return err; +} + +static int mlx5_set_mtrc_caps_trace_owner(struct mlx5_fw_tracer *tracer, + u32 *out, u32 out_size, + u8 trace_owner) +{ + struct mlx5_core_dev *dev = tracer->dev; + u32 in[MLX5_ST_SZ_DW(mtrc_cap)] = {0}; + + MLX5_SET(mtrc_cap, in, trace_owner, trace_owner); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, out_size, + MLX5_REG_MTRC_CAP, 0, 1); +} + +static int mlx5_fw_tracer_ownership_acquire(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev = tracer->dev; + u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0}; + int err; + + err = mlx5_set_mtrc_caps_trace_owner(tracer, out, sizeof(out), + MLX5_FW_TRACER_ACQUIRE_OWNERSHIP); + if (err) { + mlx5_core_warn(dev, "FWTracer: Acquire tracer ownership failed %d\n", + err); + return err; + } + + tracer->owner = !!MLX5_GET(mtrc_cap, out, trace_owner); + + if (!tracer->owner) + return -EBUSY; + + return 0; +} + +static void mlx5_fw_tracer_ownership_release(struct mlx5_fw_tracer *tracer) +{ + u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0}; + + mlx5_set_mtrc_caps_trace_owner(tracer, out, sizeof(out), + MLX5_FW_TRACER_RELEASE_OWNERSHIP); + tracer->owner = false; +} + +static int mlx5_fw_tracer_create_log_buf(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev = tracer->dev; + struct device *ddev; + dma_addr_t dma; + void *buff; + gfp_t gfp; + int err; + + tracer->buff.size = TRACE_BUFFER_SIZE_BYTE; + + gfp = GFP_KERNEL | __GFP_ZERO; + buff = (void *)__get_free_pages(gfp, + get_order(tracer->buff.size)); + if (!buff) { + err = -ENOMEM; + mlx5_core_warn(dev, "FWTracer: Failed to allocate pages, %d\n", err); + return err; + } + tracer->buff.log_buf = buff; + + ddev = mlx5_core_dma_dev(dev); + dma = dma_map_single(ddev, buff, tracer->buff.size, DMA_FROM_DEVICE); + if (dma_mapping_error(ddev, dma)) { + mlx5_core_warn(dev, "FWTracer: Unable to map DMA: %d\n", + dma_mapping_error(ddev, dma)); + err = -ENOMEM; + goto free_pages; + } + tracer->buff.dma = dma; + + return 0; + +free_pages: + free_pages((unsigned long)tracer->buff.log_buf, get_order(tracer->buff.size)); + + return err; +} + +static void mlx5_fw_tracer_destroy_log_buf(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev = tracer->dev; + struct device *ddev; + + if (!tracer->buff.log_buf) + return; + + ddev = mlx5_core_dma_dev(dev); + dma_unmap_single(ddev, tracer->buff.dma, tracer->buff.size, DMA_FROM_DEVICE); + free_pages((unsigned long)tracer->buff.log_buf, get_order(tracer->buff.size)); +} + +static int mlx5_fw_tracer_create_mkey(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev = tracer->dev; + int err, inlen, i; + __be64 *mtt; + void *mkc; + u32 *in; + + inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + + sizeof(*mtt) * round_up(TRACER_BUFFER_PAGE_NUM, 2); + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_mkey_in, in, translations_octword_actual_size, + DIV_ROUND_UP(TRACER_BUFFER_PAGE_NUM, 2)); + mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + for (i = 0 ; i < TRACER_BUFFER_PAGE_NUM ; i++) + mtt[i] = cpu_to_be64(tracer->buff.dma + i * PAGE_SIZE); + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); + MLX5_SET(mkc, mkc, lr, 1); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, pd, tracer->buff.pdn); + MLX5_SET(mkc, mkc, bsf_octword_size, 0); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); + MLX5_SET(mkc, mkc, translations_octword_size, + DIV_ROUND_UP(TRACER_BUFFER_PAGE_NUM, 2)); + MLX5_SET64(mkc, mkc, start_addr, tracer->buff.dma); + MLX5_SET64(mkc, mkc, len, tracer->buff.size); + err = mlx5_core_create_mkey(dev, &tracer->buff.mkey, in, inlen); + if (err) + mlx5_core_warn(dev, "FWTracer: Failed to create mkey, %d\n", err); + + kvfree(in); + + return err; +} + +static void mlx5_fw_tracer_free_strings_db(struct mlx5_fw_tracer *tracer) +{ + u32 num_string_db = tracer->str_db.num_string_db; + int i; + + for (i = 0; i < num_string_db; i++) { + kfree(tracer->str_db.buffer[i]); + tracer->str_db.buffer[i] = NULL; + } +} + +static int mlx5_fw_tracer_allocate_strings_db(struct mlx5_fw_tracer *tracer) +{ + u32 *string_db_size_out = tracer->str_db.size_out; + u32 num_string_db = tracer->str_db.num_string_db; + int i; + + for (i = 0; i < num_string_db; i++) { + if (!string_db_size_out[i]) + continue; + tracer->str_db.buffer[i] = kzalloc(string_db_size_out[i], GFP_KERNEL); + if (!tracer->str_db.buffer[i]) + goto free_strings_db; + } + + return 0; + +free_strings_db: + mlx5_fw_tracer_free_strings_db(tracer); + return -ENOMEM; +} + +static void +mlx5_fw_tracer_init_saved_traces_array(struct mlx5_fw_tracer *tracer) +{ + tracer->st_arr.saved_traces_index = 0; + mutex_init(&tracer->st_arr.lock); +} + +static void +mlx5_fw_tracer_clean_saved_traces_array(struct mlx5_fw_tracer *tracer) +{ + mutex_destroy(&tracer->st_arr.lock); +} + +static void mlx5_tracer_read_strings_db(struct work_struct *work) +{ + struct mlx5_fw_tracer *tracer = container_of(work, struct mlx5_fw_tracer, + read_fw_strings_work); + u32 num_of_reads, num_string_db = tracer->str_db.num_string_db; + struct mlx5_core_dev *dev = tracer->dev; + u32 in[MLX5_ST_SZ_DW(mtrc_cap)] = {0}; + u32 leftovers, offset; + int err = 0, i, j; + u32 *out, outlen; + void *out_value; + + outlen = MLX5_ST_SZ_BYTES(mtrc_stdb) + STRINGS_DB_READ_SIZE_BYTES; + out = kzalloc(outlen, GFP_KERNEL); + if (!out) { + err = -ENOMEM; + goto out; + } + + for (i = 0; i < num_string_db; i++) { + if (!tracer->str_db.size_out[i]) + continue; + offset = 0; + MLX5_SET(mtrc_stdb, in, string_db_index, i); + num_of_reads = tracer->str_db.size_out[i] / + STRINGS_DB_READ_SIZE_BYTES; + leftovers = (tracer->str_db.size_out[i] % + STRINGS_DB_READ_SIZE_BYTES) / + STRINGS_DB_LEFTOVER_SIZE_BYTES; + + MLX5_SET(mtrc_stdb, in, read_size, STRINGS_DB_READ_SIZE_BYTES); + for (j = 0; j < num_of_reads; j++) { + MLX5_SET(mtrc_stdb, in, start_offset, offset); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + outlen, MLX5_REG_MTRC_STDB, + 0, 1); + if (err) { + mlx5_core_dbg(dev, "FWTracer: Failed to read strings DB %d\n", + err); + goto out_free; + } + + out_value = MLX5_ADDR_OF(mtrc_stdb, out, string_db_data); + memcpy(tracer->str_db.buffer[i] + offset, out_value, + STRINGS_DB_READ_SIZE_BYTES); + offset += STRINGS_DB_READ_SIZE_BYTES; + } + + /* Strings database is aligned to 64, need to read leftovers*/ + MLX5_SET(mtrc_stdb, in, read_size, + STRINGS_DB_LEFTOVER_SIZE_BYTES); + for (j = 0; j < leftovers; j++) { + MLX5_SET(mtrc_stdb, in, start_offset, offset); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + outlen, MLX5_REG_MTRC_STDB, + 0, 1); + if (err) { + mlx5_core_dbg(dev, "FWTracer: Failed to read strings DB %d\n", + err); + goto out_free; + } + + out_value = MLX5_ADDR_OF(mtrc_stdb, out, string_db_data); + memcpy(tracer->str_db.buffer[i] + offset, out_value, + STRINGS_DB_LEFTOVER_SIZE_BYTES); + offset += STRINGS_DB_LEFTOVER_SIZE_BYTES; + } + } + + tracer->str_db.loaded = true; + +out_free: + kfree(out); +out: + return; +} + +static void mlx5_fw_tracer_arm(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0}; + u32 in[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0}; + int err; + + MLX5_SET(mtrc_ctrl, in, arm_event, 1); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MTRC_CTRL, 0, 1); + if (err) + mlx5_core_warn(dev, "FWTracer: Failed to arm tracer event %d\n", err); +} + +static const char *VAL_PARM = "%llx"; +static const char *REPLACE_64_VAL_PARM = "%x%x"; +static const char *PARAM_CHAR = "%"; + +static int mlx5_tracer_message_hash(u32 message_id) +{ + return jhash_1word(message_id, 0) & (MESSAGE_HASH_SIZE - 1); +} + +static struct tracer_string_format *mlx5_tracer_message_insert(struct mlx5_fw_tracer *tracer, + struct tracer_event *tracer_event) +{ + struct hlist_head *head = + &tracer->hash[mlx5_tracer_message_hash(tracer_event->string_event.tmsn)]; + struct tracer_string_format *cur_string; + + cur_string = kzalloc(sizeof(*cur_string), GFP_KERNEL); + if (!cur_string) + return NULL; + + hlist_add_head(&cur_string->hlist, head); + + return cur_string; +} + +static struct tracer_string_format *mlx5_tracer_get_string(struct mlx5_fw_tracer *tracer, + struct tracer_event *tracer_event) +{ + struct tracer_string_format *cur_string; + u32 str_ptr, offset; + int i; + + str_ptr = tracer_event->string_event.string_param; + + for (i = 0; i < tracer->str_db.num_string_db; i++) { + if (!tracer->str_db.size_out[i]) + continue; + if (str_ptr > tracer->str_db.base_address_out[i] && + str_ptr < tracer->str_db.base_address_out[i] + + tracer->str_db.size_out[i]) { + offset = str_ptr - tracer->str_db.base_address_out[i]; + /* add it to the hash */ + cur_string = mlx5_tracer_message_insert(tracer, tracer_event); + if (!cur_string) + return NULL; + cur_string->string = (char *)(tracer->str_db.buffer[i] + + offset); + return cur_string; + } + } + + return NULL; +} + +static void mlx5_tracer_clean_message(struct tracer_string_format *str_frmt) +{ + hlist_del(&str_frmt->hlist); + kfree(str_frmt); +} + +static int mlx5_tracer_get_num_of_params(char *str) +{ + char *substr, *pstr = str; + int num_of_params = 0; + + /* replace %llx with %x%x */ + substr = strstr(pstr, VAL_PARM); + while (substr) { + memcpy(substr, REPLACE_64_VAL_PARM, 4); + pstr = substr; + substr = strstr(pstr, VAL_PARM); + } + + /* count all the % characters */ + substr = strstr(str, PARAM_CHAR); + while (substr) { + num_of_params += 1; + str = substr + 1; + substr = strstr(str, PARAM_CHAR); + } + + return num_of_params; +} + +static struct tracer_string_format *mlx5_tracer_message_find(struct hlist_head *head, + u8 event_id, u32 tmsn) +{ + struct tracer_string_format *message; + + hlist_for_each_entry(message, head, hlist) + if (message->event_id == event_id && message->tmsn == tmsn) + return message; + + return NULL; +} + +static struct tracer_string_format *mlx5_tracer_message_get(struct mlx5_fw_tracer *tracer, + struct tracer_event *tracer_event) +{ + struct hlist_head *head = + &tracer->hash[mlx5_tracer_message_hash(tracer_event->string_event.tmsn)]; + + return mlx5_tracer_message_find(head, tracer_event->event_id, tracer_event->string_event.tmsn); +} + +static void poll_trace(struct mlx5_fw_tracer *tracer, + struct tracer_event *tracer_event, u64 *trace) +{ + u32 timestamp_low, timestamp_mid, timestamp_high, urts; + + tracer_event->event_id = MLX5_GET(tracer_event, trace, event_id); + tracer_event->lost_event = MLX5_GET(tracer_event, trace, lost); + tracer_event->out = trace; + + switch (tracer_event->event_id) { + case TRACER_EVENT_TYPE_TIMESTAMP: + tracer_event->type = TRACER_EVENT_TYPE_TIMESTAMP; + urts = MLX5_GET(tracer_timestamp_event, trace, urts); + if (tracer->trc_ver == 0) + tracer_event->timestamp_event.unreliable = !!(urts >> 2); + else + tracer_event->timestamp_event.unreliable = !!(urts & 1); + + timestamp_low = MLX5_GET(tracer_timestamp_event, + trace, timestamp7_0); + timestamp_mid = MLX5_GET(tracer_timestamp_event, + trace, timestamp39_8); + timestamp_high = MLX5_GET(tracer_timestamp_event, + trace, timestamp52_40); + + tracer_event->timestamp_event.timestamp = + ((u64)timestamp_high << 40) | + ((u64)timestamp_mid << 8) | + (u64)timestamp_low; + break; + default: + if (tracer_event->event_id >= tracer->str_db.first_string_trace && + tracer_event->event_id <= tracer->str_db.first_string_trace + + tracer->str_db.num_string_trace) { + tracer_event->type = TRACER_EVENT_TYPE_STRING; + tracer_event->string_event.timestamp = + MLX5_GET(tracer_string_event, trace, timestamp); + tracer_event->string_event.string_param = + MLX5_GET(tracer_string_event, trace, string_param); + tracer_event->string_event.tmsn = + MLX5_GET(tracer_string_event, trace, tmsn); + tracer_event->string_event.tdsn = + MLX5_GET(tracer_string_event, trace, tdsn); + } else { + tracer_event->type = TRACER_EVENT_TYPE_UNRECOGNIZED; + } + break; + } +} + +static u64 get_block_timestamp(struct mlx5_fw_tracer *tracer, u64 *ts_event) +{ + struct tracer_event tracer_event; + u8 event_id; + + event_id = MLX5_GET(tracer_event, ts_event, event_id); + + if (event_id == TRACER_EVENT_TYPE_TIMESTAMP) + poll_trace(tracer, &tracer_event, ts_event); + else + tracer_event.timestamp_event.timestamp = 0; + + return tracer_event.timestamp_event.timestamp; +} + +static void mlx5_fw_tracer_clean_print_hash(struct mlx5_fw_tracer *tracer) +{ + struct tracer_string_format *str_frmt; + struct hlist_node *n; + int i; + + for (i = 0; i < MESSAGE_HASH_SIZE; i++) { + hlist_for_each_entry_safe(str_frmt, n, &tracer->hash[i], hlist) + mlx5_tracer_clean_message(str_frmt); + } +} + +static void mlx5_fw_tracer_clean_ready_list(struct mlx5_fw_tracer *tracer) +{ + struct tracer_string_format *str_frmt, *tmp_str; + + list_for_each_entry_safe(str_frmt, tmp_str, &tracer->ready_strings_list, + list) + list_del(&str_frmt->list); +} + +static void mlx5_fw_tracer_save_trace(struct mlx5_fw_tracer *tracer, + u64 timestamp, bool lost, + u8 event_id, char *msg) +{ + struct mlx5_fw_trace_data *trace_data; + + mutex_lock(&tracer->st_arr.lock); + trace_data = &tracer->st_arr.straces[tracer->st_arr.saved_traces_index]; + trace_data->timestamp = timestamp; + trace_data->lost = lost; + trace_data->event_id = event_id; + strscpy_pad(trace_data->msg, msg, TRACE_STR_MSG); + + tracer->st_arr.saved_traces_index = + (tracer->st_arr.saved_traces_index + 1) & (SAVED_TRACES_NUM - 1); + mutex_unlock(&tracer->st_arr.lock); +} + +static noinline +void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt, + struct mlx5_core_dev *dev, + u64 trace_timestamp) +{ + char tmp[512]; + + snprintf(tmp, sizeof(tmp), str_frmt->string, + str_frmt->params[0], + str_frmt->params[1], + str_frmt->params[2], + str_frmt->params[3], + str_frmt->params[4], + str_frmt->params[5], + str_frmt->params[6]); + + trace_mlx5_fw(dev->tracer, trace_timestamp, str_frmt->lost, + str_frmt->event_id, tmp); + + mlx5_fw_tracer_save_trace(dev->tracer, trace_timestamp, + str_frmt->lost, str_frmt->event_id, tmp); + + /* remove it from hash */ + mlx5_tracer_clean_message(str_frmt); +} + +static int mlx5_tracer_handle_raw_string(struct mlx5_fw_tracer *tracer, + struct tracer_event *tracer_event) +{ + struct tracer_string_format *cur_string; + + cur_string = mlx5_tracer_message_insert(tracer, tracer_event); + if (!cur_string) + return -1; + + cur_string->event_id = tracer_event->event_id; + cur_string->timestamp = tracer_event->string_event.timestamp; + cur_string->lost = tracer_event->lost_event; + cur_string->string = "0x%08x%08x"; + cur_string->num_of_params = 2; + cur_string->params[0] = upper_32_bits(*tracer_event->out); + cur_string->params[1] = lower_32_bits(*tracer_event->out); + list_add_tail(&cur_string->list, &tracer->ready_strings_list); + return 0; +} + +static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer, + struct tracer_event *tracer_event) +{ + struct tracer_string_format *cur_string; + + if (tracer_event->string_event.tdsn == 0) { + cur_string = mlx5_tracer_get_string(tracer, tracer_event); + if (!cur_string) + return mlx5_tracer_handle_raw_string(tracer, tracer_event); + + cur_string->num_of_params = mlx5_tracer_get_num_of_params(cur_string->string); + cur_string->last_param_num = 0; + cur_string->event_id = tracer_event->event_id; + cur_string->tmsn = tracer_event->string_event.tmsn; + cur_string->timestamp = tracer_event->string_event.timestamp; + cur_string->lost = tracer_event->lost_event; + if (cur_string->num_of_params == 0) /* trace with no params */ + list_add_tail(&cur_string->list, &tracer->ready_strings_list); + } else { + cur_string = mlx5_tracer_message_get(tracer, tracer_event); + if (!cur_string) { + pr_debug("%s Got string event for unknown string tmsn: %d\n", + __func__, tracer_event->string_event.tmsn); + return mlx5_tracer_handle_raw_string(tracer, tracer_event); + } + cur_string->last_param_num += 1; + if (cur_string->last_param_num > TRACER_MAX_PARAMS) { + pr_debug("%s Number of params exceeds the max (%d)\n", + __func__, TRACER_MAX_PARAMS); + list_add_tail(&cur_string->list, &tracer->ready_strings_list); + return 0; + } + /* keep the new parameter */ + cur_string->params[cur_string->last_param_num - 1] = + tracer_event->string_event.string_param; + if (cur_string->last_param_num == cur_string->num_of_params) + list_add_tail(&cur_string->list, &tracer->ready_strings_list); + } + + return 0; +} + +static void mlx5_tracer_handle_timestamp_trace(struct mlx5_fw_tracer *tracer, + struct tracer_event *tracer_event) +{ + struct tracer_timestamp_event timestamp_event = + tracer_event->timestamp_event; + struct tracer_string_format *str_frmt, *tmp_str; + struct mlx5_core_dev *dev = tracer->dev; + u64 trace_timestamp; + + list_for_each_entry_safe(str_frmt, tmp_str, &tracer->ready_strings_list, list) { + list_del(&str_frmt->list); + if (str_frmt->timestamp < (timestamp_event.timestamp & MASK_6_0)) + trace_timestamp = (timestamp_event.timestamp & MASK_52_7) | + (str_frmt->timestamp & MASK_6_0); + else + trace_timestamp = ((timestamp_event.timestamp - 1) & MASK_52_7) | + (str_frmt->timestamp & MASK_6_0); + + mlx5_tracer_print_trace(str_frmt, dev, trace_timestamp); + } +} + +static int mlx5_tracer_handle_trace(struct mlx5_fw_tracer *tracer, + struct tracer_event *tracer_event) +{ + if (tracer_event->type == TRACER_EVENT_TYPE_STRING) { + mlx5_tracer_handle_string_trace(tracer, tracer_event); + } else if (tracer_event->type == TRACER_EVENT_TYPE_TIMESTAMP) { + if (!tracer_event->timestamp_event.unreliable) + mlx5_tracer_handle_timestamp_trace(tracer, tracer_event); + } else { + pr_debug("%s Got unrecognised type %d for parsing, exiting..\n", + __func__, tracer_event->type); + } + return 0; +} + +static void mlx5_fw_tracer_handle_traces(struct work_struct *work) +{ + struct mlx5_fw_tracer *tracer = + container_of(work, struct mlx5_fw_tracer, handle_traces_work); + u64 block_timestamp, last_block_timestamp, tmp_trace_block[TRACES_PER_BLOCK]; + u32 block_count, start_offset, prev_start_offset, prev_consumer_index; + u32 trace_event_size = MLX5_ST_SZ_BYTES(tracer_event); + struct mlx5_core_dev *dev = tracer->dev; + struct tracer_event tracer_event; + int i; + + mlx5_core_dbg(dev, "FWTracer: Handle Trace event, owner=(%d)\n", tracer->owner); + if (!tracer->owner) + return; + + if (unlikely(!tracer->str_db.loaded)) + goto arm; + + block_count = tracer->buff.size / TRACER_BLOCK_SIZE_BYTE; + start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE; + + /* Copy the block to local buffer to avoid HW override while being processed */ + memcpy(tmp_trace_block, tracer->buff.log_buf + start_offset, + TRACER_BLOCK_SIZE_BYTE); + + block_timestamp = + get_block_timestamp(tracer, &tmp_trace_block[TRACES_PER_BLOCK - 1]); + + while (block_timestamp > tracer->last_timestamp) { + /* Check block override if it's not the first block */ + if (tracer->last_timestamp) { + u64 *ts_event; + /* To avoid block override be the HW in case of buffer + * wraparound, the time stamp of the previous block + * should be compared to the last timestamp handled + * by the driver. + */ + prev_consumer_index = + (tracer->buff.consumer_index - 1) & (block_count - 1); + prev_start_offset = prev_consumer_index * TRACER_BLOCK_SIZE_BYTE; + + ts_event = tracer->buff.log_buf + prev_start_offset + + (TRACES_PER_BLOCK - 1) * trace_event_size; + last_block_timestamp = get_block_timestamp(tracer, ts_event); + /* If previous timestamp different from last stored + * timestamp then there is a good chance that the + * current buffer is overwritten and therefore should + * not be parsed. + */ + if (tracer->last_timestamp != last_block_timestamp) { + mlx5_core_warn(dev, "FWTracer: Events were lost\n"); + tracer->last_timestamp = block_timestamp; + tracer->buff.consumer_index = + (tracer->buff.consumer_index + 1) & (block_count - 1); + break; + } + } + + /* Parse events */ + for (i = 0; i < TRACES_PER_BLOCK ; i++) { + poll_trace(tracer, &tracer_event, &tmp_trace_block[i]); + mlx5_tracer_handle_trace(tracer, &tracer_event); + } + + tracer->buff.consumer_index = + (tracer->buff.consumer_index + 1) & (block_count - 1); + + tracer->last_timestamp = block_timestamp; + start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE; + memcpy(tmp_trace_block, tracer->buff.log_buf + start_offset, + TRACER_BLOCK_SIZE_BYTE); + block_timestamp = get_block_timestamp(tracer, + &tmp_trace_block[TRACES_PER_BLOCK - 1]); + } + +arm: + mlx5_fw_tracer_arm(dev); +} + +static int mlx5_fw_tracer_set_mtrc_conf(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev = tracer->dev; + u32 out[MLX5_ST_SZ_DW(mtrc_conf)] = {0}; + u32 in[MLX5_ST_SZ_DW(mtrc_conf)] = {0}; + int err; + + MLX5_SET(mtrc_conf, in, trace_mode, TRACE_TO_MEMORY); + MLX5_SET(mtrc_conf, in, log_trace_buffer_size, + ilog2(TRACER_BUFFER_PAGE_NUM)); + MLX5_SET(mtrc_conf, in, trace_mkey, tracer->buff.mkey); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MTRC_CONF, 0, 1); + if (err) + mlx5_core_warn(dev, "FWTracer: Failed to set tracer configurations %d\n", err); + + tracer->buff.consumer_index = 0; + return err; +} + +static int mlx5_fw_tracer_set_mtrc_ctrl(struct mlx5_fw_tracer *tracer, u8 status, u8 arm) +{ + struct mlx5_core_dev *dev = tracer->dev; + u32 out[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0}; + u32 in[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0}; + int err; + + MLX5_SET(mtrc_ctrl, in, modify_field_select, TRACE_STATUS); + MLX5_SET(mtrc_ctrl, in, trace_status, status); + MLX5_SET(mtrc_ctrl, in, arm_event, arm); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MTRC_CTRL, 0, 1); + + if (!err && status) + tracer->last_timestamp = 0; + + return err; +} + +static int mlx5_fw_tracer_start(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev = tracer->dev; + int err; + + err = mlx5_fw_tracer_ownership_acquire(tracer); + if (err) { + mlx5_core_dbg(dev, "FWTracer: Ownership was not granted %d\n", err); + /* Don't fail since ownership can be acquired on a later FW event */ + return 0; + } + + err = mlx5_fw_tracer_set_mtrc_conf(tracer); + if (err) { + mlx5_core_warn(dev, "FWTracer: Failed to set tracer configuration %d\n", err); + goto release_ownership; + } + + /* enable tracer & trace events */ + err = mlx5_fw_tracer_set_mtrc_ctrl(tracer, 1, 1); + if (err) { + mlx5_core_warn(dev, "FWTracer: Failed to enable tracer %d\n", err); + goto release_ownership; + } + + mlx5_core_dbg(dev, "FWTracer: Ownership granted and active\n"); + return 0; + +release_ownership: + mlx5_fw_tracer_ownership_release(tracer); + return err; +} + +static void mlx5_fw_tracer_ownership_change(struct work_struct *work) +{ + struct mlx5_fw_tracer *tracer = + container_of(work, struct mlx5_fw_tracer, ownership_change_work); + + mlx5_core_dbg(tracer->dev, "FWTracer: ownership changed, current=(%d)\n", tracer->owner); + if (tracer->owner) { + mlx5_fw_tracer_ownership_acquire(tracer); + return; + } + + mlx5_fw_tracer_start(tracer); +} + +static int mlx5_fw_tracer_set_core_dump_reg(struct mlx5_core_dev *dev, + u32 *in, int size_in) +{ + u32 out[MLX5_ST_SZ_DW(core_dump_reg)] = {}; + + if (!MLX5_CAP_DEBUG(dev, core_dump_general) && + !MLX5_CAP_DEBUG(dev, core_dump_qp)) + return -EOPNOTSUPP; + + return mlx5_core_access_reg(dev, in, size_in, out, sizeof(out), + MLX5_REG_CORE_DUMP, 0, 1); +} + +int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_tracer *tracer = dev->tracer; + u32 in[MLX5_ST_SZ_DW(core_dump_reg)] = {}; + int err; + + if (!MLX5_CAP_DEBUG(dev, core_dump_general) || !tracer) + return -EOPNOTSUPP; + if (!tracer->owner) + return -EPERM; + + MLX5_SET(core_dump_reg, in, core_dump_type, 0x0); + + err = mlx5_fw_tracer_set_core_dump_reg(dev, in, sizeof(in)); + if (err) + return err; + queue_work(tracer->work_queue, &tracer->handle_traces_work); + flush_workqueue(tracer->work_queue); + return 0; +} + +static int +mlx5_devlink_fmsg_fill_trace(struct devlink_fmsg *fmsg, + struct mlx5_fw_trace_data *trace_data) +{ + int err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "timestamp", trace_data->timestamp); + if (err) + return err; + + err = devlink_fmsg_bool_pair_put(fmsg, "lost", trace_data->lost); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "event_id", trace_data->event_id); + if (err) + return err; + + err = devlink_fmsg_string_pair_put(fmsg, "msg", trace_data->msg); + if (err) + return err; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + return 0; +} + +int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer, + struct devlink_fmsg *fmsg) +{ + struct mlx5_fw_trace_data *straces = tracer->st_arr.straces; + u32 index, start_index, end_index; + u32 saved_traces_index; + int err; + + if (!straces[0].timestamp) + return -ENOMSG; + + mutex_lock(&tracer->st_arr.lock); + saved_traces_index = tracer->st_arr.saved_traces_index; + if (straces[saved_traces_index].timestamp) + start_index = saved_traces_index; + else + start_index = 0; + end_index = (saved_traces_index - 1) & (SAVED_TRACES_NUM - 1); + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "dump fw traces"); + if (err) + goto unlock; + index = start_index; + while (index != end_index) { + err = mlx5_devlink_fmsg_fill_trace(fmsg, &straces[index]); + if (err) + goto unlock; + + index = (index + 1) & (SAVED_TRACES_NUM - 1); + } + + err = devlink_fmsg_arr_pair_nest_end(fmsg); +unlock: + mutex_unlock(&tracer->st_arr.lock); + return err; +} + +static void mlx5_fw_tracer_update_db(struct work_struct *work) +{ + struct mlx5_fw_tracer *tracer = + container_of(work, struct mlx5_fw_tracer, update_db_work); + + mlx5_fw_tracer_reload(tracer); +} + +/* Create software resources (Buffers, etc ..) */ +struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_tracer *tracer = NULL; + int err; + + if (!MLX5_CAP_MCAM_REG(dev, tracer_registers)) { + mlx5_core_dbg(dev, "FWTracer: Tracer capability not present\n"); + return NULL; + } + + tracer = kvzalloc(sizeof(*tracer), GFP_KERNEL); + if (!tracer) + return ERR_PTR(-ENOMEM); + + tracer->work_queue = create_singlethread_workqueue("mlx5_fw_tracer"); + if (!tracer->work_queue) { + err = -ENOMEM; + goto free_tracer; + } + + tracer->dev = dev; + + INIT_LIST_HEAD(&tracer->ready_strings_list); + INIT_WORK(&tracer->ownership_change_work, mlx5_fw_tracer_ownership_change); + INIT_WORK(&tracer->read_fw_strings_work, mlx5_tracer_read_strings_db); + INIT_WORK(&tracer->handle_traces_work, mlx5_fw_tracer_handle_traces); + INIT_WORK(&tracer->update_db_work, mlx5_fw_tracer_update_db); + mutex_init(&tracer->state_lock); + + + err = mlx5_query_mtrc_caps(tracer); + if (err) { + mlx5_core_dbg(dev, "FWTracer: Failed to query capabilities %d\n", err); + goto destroy_workqueue; + } + + err = mlx5_fw_tracer_create_log_buf(tracer); + if (err) { + mlx5_core_warn(dev, "FWTracer: Create log buffer failed %d\n", err); + goto destroy_workqueue; + } + + err = mlx5_fw_tracer_allocate_strings_db(tracer); + if (err) { + mlx5_core_warn(dev, "FWTracer: Allocate strings database failed %d\n", err); + goto free_log_buf; + } + + mlx5_fw_tracer_init_saved_traces_array(tracer); + mlx5_core_dbg(dev, "FWTracer: Tracer created\n"); + + return tracer; + +free_log_buf: + mlx5_fw_tracer_destroy_log_buf(tracer); +destroy_workqueue: + tracer->dev = NULL; + destroy_workqueue(tracer->work_queue); +free_tracer: + kvfree(tracer); + return ERR_PTR(err); +} + +static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data); + +/* Create HW resources + start tracer */ +int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev; + int err; + + if (IS_ERR_OR_NULL(tracer)) + return 0; + + if (!tracer->str_db.loaded) + queue_work(tracer->work_queue, &tracer->read_fw_strings_work); + + mutex_lock(&tracer->state_lock); + if (test_and_set_bit(MLX5_TRACER_STATE_UP, &tracer->state)) + goto unlock; + + dev = tracer->dev; + + err = mlx5_core_alloc_pd(dev, &tracer->buff.pdn); + if (err) { + mlx5_core_warn(dev, "FWTracer: Failed to allocate PD %d\n", err); + goto err_cancel_work; + } + + err = mlx5_fw_tracer_create_mkey(tracer); + if (err) { + mlx5_core_warn(dev, "FWTracer: Failed to create mkey %d\n", err); + goto err_dealloc_pd; + } + + MLX5_NB_INIT(&tracer->nb, fw_tracer_event, DEVICE_TRACER); + mlx5_eq_notifier_register(dev, &tracer->nb); + + err = mlx5_fw_tracer_start(tracer); + if (err) { + mlx5_core_warn(dev, "FWTracer: Failed to start tracer %d\n", err); + goto err_notifier_unregister; + } +unlock: + mutex_unlock(&tracer->state_lock); + return 0; + +err_notifier_unregister: + mlx5_eq_notifier_unregister(dev, &tracer->nb); + mlx5_core_destroy_mkey(dev, tracer->buff.mkey); +err_dealloc_pd: + mlx5_core_dealloc_pd(dev, tracer->buff.pdn); +err_cancel_work: + cancel_work_sync(&tracer->read_fw_strings_work); + mutex_unlock(&tracer->state_lock); + return err; +} + +/* Stop tracer + Cleanup HW resources */ +void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer) +{ + if (IS_ERR_OR_NULL(tracer)) + return; + + mutex_lock(&tracer->state_lock); + if (!test_and_clear_bit(MLX5_TRACER_STATE_UP, &tracer->state)) + goto unlock; + + mlx5_core_dbg(tracer->dev, "FWTracer: Cleanup, is owner ? (%d)\n", + tracer->owner); + mlx5_eq_notifier_unregister(tracer->dev, &tracer->nb); + cancel_work_sync(&tracer->ownership_change_work); + cancel_work_sync(&tracer->handle_traces_work); + /* It is valid to get here from update_db_work. Hence, don't wait for + * update_db_work to finished. + */ + cancel_work(&tracer->update_db_work); + + if (tracer->owner) + mlx5_fw_tracer_ownership_release(tracer); + + mlx5_core_destroy_mkey(tracer->dev, tracer->buff.mkey); + mlx5_core_dealloc_pd(tracer->dev, tracer->buff.pdn); +unlock: + mutex_unlock(&tracer->state_lock); +} + +/* Free software resources (Buffers, etc ..) */ +void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer) +{ + if (IS_ERR_OR_NULL(tracer)) + return; + + mlx5_core_dbg(tracer->dev, "FWTracer: Destroy\n"); + + cancel_work_sync(&tracer->read_fw_strings_work); + mlx5_fw_tracer_clean_ready_list(tracer); + mlx5_fw_tracer_clean_print_hash(tracer); + mlx5_fw_tracer_clean_saved_traces_array(tracer); + mlx5_fw_tracer_free_strings_db(tracer); + mlx5_fw_tracer_destroy_log_buf(tracer); + mutex_destroy(&tracer->state_lock); + destroy_workqueue(tracer->work_queue); + kvfree(tracer); +} + +static int mlx5_fw_tracer_recreate_strings_db(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev; + int err; + + if (test_and_set_bit(MLX5_TRACER_RECREATE_DB, &tracer->state)) + return 0; + cancel_work_sync(&tracer->read_fw_strings_work); + mlx5_fw_tracer_clean_ready_list(tracer); + mlx5_fw_tracer_clean_print_hash(tracer); + mlx5_fw_tracer_clean_saved_traces_array(tracer); + mlx5_fw_tracer_free_strings_db(tracer); + + dev = tracer->dev; + err = mlx5_query_mtrc_caps(tracer); + if (err) { + mlx5_core_dbg(dev, "FWTracer: Failed to query capabilities %d\n", err); + goto out; + } + + err = mlx5_fw_tracer_allocate_strings_db(tracer); + if (err) { + mlx5_core_warn(dev, "FWTracer: Allocate strings DB failed %d\n", err); + goto out; + } + mlx5_fw_tracer_init_saved_traces_array(tracer); +out: + clear_bit(MLX5_TRACER_RECREATE_DB, &tracer->state); + return err; +} + +int mlx5_fw_tracer_reload(struct mlx5_fw_tracer *tracer) +{ + struct mlx5_core_dev *dev; + int err; + + if (IS_ERR_OR_NULL(tracer)) + return 0; + + dev = tracer->dev; + mlx5_fw_tracer_cleanup(tracer); + err = mlx5_fw_tracer_recreate_strings_db(tracer); + if (err) { + mlx5_core_warn(dev, "Failed to recreate FW tracer strings DB\n"); + return err; + } + err = mlx5_fw_tracer_init(tracer); + if (err) { + mlx5_core_warn(dev, "Failed to re-initialize FW tracer\n"); + return err; + } + + return 0; +} + +static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data) +{ + struct mlx5_fw_tracer *tracer = mlx5_nb_cof(nb, struct mlx5_fw_tracer, nb); + struct mlx5_core_dev *dev = tracer->dev; + struct mlx5_eqe *eqe = data; + + switch (eqe->sub_type) { + case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE: + queue_work(tracer->work_queue, &tracer->ownership_change_work); + break; + case MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE: + queue_work(tracer->work_queue, &tracer->handle_traces_work); + break; + case MLX5_TRACER_SUBTYPE_STRINGS_DB_UPDATE: + queue_work(tracer->work_queue, &tracer->update_db_work); + break; + default: + mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n", + eqe->sub_type); + } + + return NOTIFY_OK; +} + +EXPORT_TRACEPOINT_SYMBOL(mlx5_fw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h new file mode 100644 index 0000000000..5c548bb74f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __LIB_TRACER_H__ +#define __LIB_TRACER_H__ + +#include +#include "mlx5_core.h" + +#define STRINGS_DB_SECTIONS_NUM 8 +#define STRINGS_DB_READ_SIZE_BYTES 256 +#define STRINGS_DB_LEFTOVER_SIZE_BYTES 64 +#define TRACER_BUFFER_PAGE_NUM 64 +#define TRACER_BUFFER_CHUNK 4096 +#define TRACE_BUFFER_SIZE_BYTE (TRACER_BUFFER_PAGE_NUM * TRACER_BUFFER_CHUNK) + +#define TRACER_BLOCK_SIZE_BYTE 256 +#define TRACES_PER_BLOCK 32 + +#define TRACE_STR_MSG 256 +#define SAVED_TRACES_NUM 8192 + +#define TRACER_MAX_PARAMS 7 +#define MESSAGE_HASH_BITS 6 +#define MESSAGE_HASH_SIZE BIT(MESSAGE_HASH_BITS) + +#define MASK_52_7 (0x1FFFFFFFFFFF80) +#define MASK_6_0 (0x7F) + +struct mlx5_fw_trace_data { + u64 timestamp; + bool lost; + u8 event_id; + char msg[TRACE_STR_MSG]; +}; + +enum mlx5_fw_tracer_state { + MLX5_TRACER_STATE_UP = BIT(0), + MLX5_TRACER_RECREATE_DB = BIT(1), +}; + +struct mlx5_fw_tracer { + struct mlx5_core_dev *dev; + struct mlx5_nb nb; + bool owner; + u8 trc_ver; + struct workqueue_struct *work_queue; + struct work_struct ownership_change_work; + struct work_struct read_fw_strings_work; + + /* Strings DB */ + struct { + u8 first_string_trace; + u8 num_string_trace; + u32 num_string_db; + u32 base_address_out[STRINGS_DB_SECTIONS_NUM]; + u32 size_out[STRINGS_DB_SECTIONS_NUM]; + void *buffer[STRINGS_DB_SECTIONS_NUM]; + bool loaded; + } str_db; + + /* Log Buffer */ + struct { + u32 pdn; + void *log_buf; + dma_addr_t dma; + u32 size; + u32 mkey; + u32 consumer_index; + } buff; + + /* Saved Traces Array */ + struct { + struct mlx5_fw_trace_data straces[SAVED_TRACES_NUM]; + u32 saved_traces_index; + struct mutex lock; /* Protect st_arr access */ + } st_arr; + + u64 last_timestamp; + struct work_struct handle_traces_work; + struct hlist_head hash[MESSAGE_HASH_SIZE]; + struct list_head ready_strings_list; + struct work_struct update_db_work; + struct mutex state_lock; /* Synchronize update work with reload flows */ + unsigned long state; +}; + +struct tracer_string_format { + char *string; + int params[TRACER_MAX_PARAMS]; + int num_of_params; + int last_param_num; + u8 event_id; + u32 tmsn; + struct hlist_node hlist; + struct list_head list; + u32 timestamp; + bool lost; +}; + +enum mlx5_fw_tracer_ownership_state { + MLX5_FW_TRACER_RELEASE_OWNERSHIP, + MLX5_FW_TRACER_ACQUIRE_OWNERSHIP, +}; + +enum tracer_ctrl_fields_select { + TRACE_STATUS = 1 << 0, +}; + +enum tracer_event_type { + TRACER_EVENT_TYPE_STRING, + TRACER_EVENT_TYPE_TIMESTAMP = 0xFF, + TRACER_EVENT_TYPE_UNRECOGNIZED, +}; + +enum tracing_mode { + TRACE_TO_MEMORY = 1 << 0, +}; + +struct tracer_timestamp_event { + u64 timestamp; + u8 unreliable; +}; + +struct tracer_string_event { + u32 timestamp; + u32 tmsn; + u32 tdsn; + u32 string_param; +}; + +struct tracer_event { + bool lost_event; + u32 type; + u8 event_id; + union { + struct tracer_string_event string_event; + struct tracer_timestamp_event timestamp_event; + }; + u64 *out; +}; + +struct mlx5_ifc_tracer_event_bits { + u8 lost[0x1]; + u8 timestamp[0x7]; + u8 event_id[0x8]; + u8 event_data[0x30]; +}; + +struct mlx5_ifc_tracer_string_event_bits { + u8 lost[0x1]; + u8 timestamp[0x7]; + u8 event_id[0x8]; + u8 tmsn[0xd]; + u8 tdsn[0x3]; + u8 string_param[0x20]; +}; + +struct mlx5_ifc_tracer_timestamp_event_bits { + u8 timestamp7_0[0x8]; + u8 event_id[0x8]; + u8 urts[0x3]; + u8 timestamp52_40[0xd]; + u8 timestamp39_8[0x20]; +}; + +struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev); +int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer); +void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer); +void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer); +int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev); +int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer, + struct devlink_fmsg *fmsg); +int mlx5_fw_tracer_reload(struct mlx5_fw_tracer *tracer); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h new file mode 100644 index 0000000000..3038be5759 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if !defined(__LIB_TRACER_TRACEPOINT_H__) || defined(TRACE_HEADER_MULTI_READ) +#define __LIB_TRACER_TRACEPOINT_H__ + +#include +#include "fw_tracer.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +/* Tracepoint for FWTracer messages: */ +TRACE_EVENT(mlx5_fw, + TP_PROTO(const struct mlx5_fw_tracer *tracer, u64 trace_timestamp, + bool lost, u8 event_id, const char *msg), + + TP_ARGS(tracer, trace_timestamp, lost, event_id, msg), + + TP_STRUCT__entry( + __string(dev_name, dev_name(tracer->dev->device)) + __field(u64, trace_timestamp) + __field(bool, lost) + __field(u8, event_id) + __string(msg, msg) + ), + + TP_fast_assign( + __assign_str(dev_name, + dev_name(tracer->dev->device)); + __entry->trace_timestamp = trace_timestamp; + __entry->lost = lost; + __entry->event_id = event_id; + __assign_str(msg, msg); + ), + + TP_printk("%s [0x%llx] %d [0x%x] %s", + __get_str(dev_name), + __entry->trace_timestamp, + __entry->lost, __entry->event_id, + __get_str(msg)) +); + +#endif + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH ./diag +#define TRACE_INCLUDE_FILE fw_tracer_tracepoint +#include diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c new file mode 100644 index 0000000000..e869c65d8e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. */ + +#include "reporter_vnic.h" +#include "en_stats.h" +#include "devlink.h" + +#define VNIC_ENV_GET64(vnic_env_stats, c) \ + MLX5_GET64(query_vnic_env_out, (vnic_env_stats)->query_vnic_env_out, \ + vport_env.c) + +struct mlx5_vnic_diag_stats { + __be64 query_vnic_env_out[MLX5_ST_SZ_QW(query_vnic_env_out)]; +}; + +int mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev, + struct devlink_fmsg *fmsg, + u16 vport_num, bool other_vport) +{ + u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {}; + struct mlx5_vnic_diag_stats vnic; + int err; + + MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV); + MLX5_SET(query_vnic_env_in, in, vport_number, vport_num); + MLX5_SET(query_vnic_env_in, in, other_vport, !!other_vport); + + err = mlx5_cmd_exec_inout(dev, query_vnic_env, in, &vnic.query_vnic_env_out); + if (err) + return err; + + err = devlink_fmsg_pair_nest_start(fmsg, "vNIC env counters"); + if (err) + return err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + if (MLX5_CAP_GEN(dev, vnic_env_queue_counters)) { + err = devlink_fmsg_u32_pair_put(fmsg, "total_error_queues", + VNIC_ENV_GET(&vnic, total_error_queues)); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "send_queue_priority_update_flow", + VNIC_ENV_GET(&vnic, + send_queue_priority_update_flow)); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, eq_overrun_count)) { + err = devlink_fmsg_u32_pair_put(fmsg, "comp_eq_overrun", + VNIC_ENV_GET(&vnic, comp_eq_overrun)); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "async_eq_overrun", + VNIC_ENV_GET(&vnic, async_eq_overrun)); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, vnic_env_cq_overrun)) { + err = devlink_fmsg_u32_pair_put(fmsg, "cq_overrun", + VNIC_ENV_GET(&vnic, cq_overrun)); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, invalid_command_count)) { + err = devlink_fmsg_u32_pair_put(fmsg, "invalid_command", + VNIC_ENV_GET(&vnic, invalid_command)); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, quota_exceeded_count)) { + err = devlink_fmsg_u32_pair_put(fmsg, "quota_exceeded_command", + VNIC_ENV_GET(&vnic, quota_exceeded_command)); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, nic_receive_steering_discard)) { + err = devlink_fmsg_u64_pair_put(fmsg, "nic_receive_steering_discard", + VNIC_ENV_GET64(&vnic, + nic_receive_steering_discard)); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, vnic_env_cnt_steering_fail)) { + err = devlink_fmsg_u64_pair_put(fmsg, "generated_pkt_steering_fail", + VNIC_ENV_GET64(&vnic, + generated_pkt_steering_fail)); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "handled_pkt_steering_fail", + VNIC_ENV_GET64(&vnic, handled_pkt_steering_fail)); + if (err) + return err; + } + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +static int mlx5_reporter_vnic_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + + return mlx5_reporter_vnic_diagnose_counters(dev, fmsg, 0, false); +} + +static const struct devlink_health_reporter_ops mlx5_reporter_vnic_ops = { + .name = "vnic", + .diagnose = mlx5_reporter_vnic_diagnose, +}; + +void mlx5_reporter_vnic_create(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct devlink *devlink = priv_to_devlink(dev); + + health->vnic_reporter = + devlink_health_reporter_create(devlink, + &mlx5_reporter_vnic_ops, + 0, dev); + if (IS_ERR(health->vnic_reporter)) + mlx5_core_warn(dev, + "Failed to create vnic reporter, err = %ld\n", + PTR_ERR(health->vnic_reporter)); +} + +void mlx5_reporter_vnic_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + if (!IS_ERR_OR_NULL(health->vnic_reporter)) + devlink_health_reporter_destroy(health->vnic_reporter); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.h new file mode 100644 index 0000000000..eba87a39e9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB + * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. + */ +#ifndef __MLX5_REPORTER_VNIC_H +#define __MLX5_REPORTER_VNIC_H + +#include "mlx5_core.h" + +void mlx5_reporter_vnic_create(struct mlx5_core_dev *dev); +void mlx5_reporter_vnic_destroy(struct mlx5_core_dev *dev); + +int mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev, + struct devlink_fmsg *fmsg, + u16 vport_num, bool other_vport); + +#endif /* __MLX5_REPORTER_VNIC_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c new file mode 100644 index 0000000000..c5b560a8b0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c @@ -0,0 +1,311 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "rsc_dump.h" +#include "lib/mlx5.h" + +#define MLX5_SGMT_TYPE(SGMT) MLX5_SGMT_TYPE_##SGMT +#define MLX5_SGMT_STR_ASSING(SGMT)[MLX5_SGMT_TYPE(SGMT)] = #SGMT +static const char *const mlx5_rsc_sgmt_name[] = { + MLX5_SGMT_STR_ASSING(HW_CQPC), + MLX5_SGMT_STR_ASSING(HW_SQPC), + MLX5_SGMT_STR_ASSING(HW_RQPC), + MLX5_SGMT_STR_ASSING(FULL_SRQC), + MLX5_SGMT_STR_ASSING(FULL_CQC), + MLX5_SGMT_STR_ASSING(FULL_EQC), + MLX5_SGMT_STR_ASSING(FULL_QPC), + MLX5_SGMT_STR_ASSING(SND_BUFF), + MLX5_SGMT_STR_ASSING(RCV_BUFF), + MLX5_SGMT_STR_ASSING(SRQ_BUFF), + MLX5_SGMT_STR_ASSING(CQ_BUFF), + MLX5_SGMT_STR_ASSING(EQ_BUFF), + MLX5_SGMT_STR_ASSING(SX_SLICE), + MLX5_SGMT_STR_ASSING(SX_SLICE_ALL), + MLX5_SGMT_STR_ASSING(RDB), + MLX5_SGMT_STR_ASSING(RX_SLICE_ALL), + MLX5_SGMT_STR_ASSING(PRM_QUERY_QP), + MLX5_SGMT_STR_ASSING(PRM_QUERY_CQ), + MLX5_SGMT_STR_ASSING(PRM_QUERY_MKEY), +}; + +struct mlx5_rsc_dump { + u32 pdn; + u32 mkey; + u32 number_of_menu_items; + u16 fw_segment_type[MLX5_SGMT_TYPE_NUM]; +}; + +struct mlx5_rsc_dump_cmd { + u64 mem_size; + u8 cmd[MLX5_ST_SZ_BYTES(resource_dump)]; +}; + +static int mlx5_rsc_dump_sgmt_get_by_name(char *name) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mlx5_rsc_sgmt_name); i++) + if (!strcmp(name, mlx5_rsc_sgmt_name[i])) + return i; + + return -EINVAL; +} + +#define MLX5_RSC_DUMP_MENU_HEADER_SIZE (MLX5_ST_SZ_BYTES(resource_dump_info_segment) + \ + MLX5_ST_SZ_BYTES(resource_dump_command_segment) + \ + MLX5_ST_SZ_BYTES(resource_dump_menu_segment)) + +static int mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct page *page, + int read_size, int start_idx) +{ + void *data = page_address(page); + enum mlx5_sgmt_type sgmt_idx; + int num_of_items; + char *sgmt_name; + void *member; + int size = 0; + void *menu; + int i; + + if (!start_idx) { + menu = MLX5_ADDR_OF(menu_resource_dump_response, data, menu); + rsc_dump->number_of_menu_items = MLX5_GET(resource_dump_menu_segment, menu, + num_of_records); + size = MLX5_RSC_DUMP_MENU_HEADER_SIZE; + data += size; + } + num_of_items = rsc_dump->number_of_menu_items; + + for (i = 0; start_idx + i < num_of_items; i++) { + size += MLX5_ST_SZ_BYTES(resource_dump_menu_record); + if (size >= read_size) + return start_idx + i; + + member = data + MLX5_ST_SZ_BYTES(resource_dump_menu_record) * i; + sgmt_name = MLX5_ADDR_OF(resource_dump_menu_record, member, segment_name); + sgmt_idx = mlx5_rsc_dump_sgmt_get_by_name(sgmt_name); + if (sgmt_idx == -EINVAL) + continue; + rsc_dump->fw_segment_type[sgmt_idx] = MLX5_GET(resource_dump_menu_record, + member, segment_type); + } + return 0; +} + +static int mlx5_rsc_dump_trigger(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd, + struct page *page) +{ + struct mlx5_rsc_dump *rsc_dump = dev->rsc_dump; + struct device *ddev = mlx5_core_dma_dev(dev); + u32 out_seq_num; + u32 in_seq_num; + dma_addr_t dma; + int err; + + dma = dma_map_page(ddev, page, 0, cmd->mem_size, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(ddev, dma))) + return -ENOMEM; + + in_seq_num = MLX5_GET(resource_dump, cmd->cmd, seq_num); + MLX5_SET(resource_dump, cmd->cmd, mkey, rsc_dump->mkey); + MLX5_SET64(resource_dump, cmd->cmd, address, dma); + + err = mlx5_core_access_reg(dev, cmd->cmd, sizeof(cmd->cmd), cmd->cmd, + sizeof(cmd->cmd), MLX5_REG_RESOURCE_DUMP, 0, 1); + if (err) { + mlx5_core_err(dev, "Resource dump: Failed to access err %d\n", err); + goto out; + } + out_seq_num = MLX5_GET(resource_dump, cmd->cmd, seq_num); + if (out_seq_num && (in_seq_num + 1 != out_seq_num)) + err = -EIO; +out: + dma_unmap_page(ddev, dma, cmd->mem_size, DMA_FROM_DEVICE); + return err; +} + +struct mlx5_rsc_dump_cmd *mlx5_rsc_dump_cmd_create(struct mlx5_core_dev *dev, + struct mlx5_rsc_key *key) +{ + struct mlx5_rsc_dump_cmd *cmd; + int sgmt_type; + + if (IS_ERR_OR_NULL(dev->rsc_dump)) + return ERR_PTR(-EOPNOTSUPP); + + sgmt_type = dev->rsc_dump->fw_segment_type[key->rsc]; + if (!sgmt_type && key->rsc != MLX5_SGMT_TYPE_MENU) + return ERR_PTR(-EOPNOTSUPP); + + cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); + if (!cmd) { + mlx5_core_err(dev, "Resource dump: Failed to allocate command\n"); + return ERR_PTR(-ENOMEM); + } + MLX5_SET(resource_dump, cmd->cmd, segment_type, sgmt_type); + MLX5_SET(resource_dump, cmd->cmd, index1, key->index1); + MLX5_SET(resource_dump, cmd->cmd, index2, key->index2); + MLX5_SET(resource_dump, cmd->cmd, num_of_obj1, key->num_of_obj1); + MLX5_SET(resource_dump, cmd->cmd, num_of_obj2, key->num_of_obj2); + MLX5_SET(resource_dump, cmd->cmd, size, key->size); + cmd->mem_size = key->size; + return cmd; +} +EXPORT_SYMBOL(mlx5_rsc_dump_cmd_create); + +void mlx5_rsc_dump_cmd_destroy(struct mlx5_rsc_dump_cmd *cmd) +{ + kfree(cmd); +} +EXPORT_SYMBOL(mlx5_rsc_dump_cmd_destroy); + +int mlx5_rsc_dump_next(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd, + struct page *page, int *size) +{ + bool more_dump; + int err; + + if (IS_ERR_OR_NULL(dev->rsc_dump)) + return -EOPNOTSUPP; + + err = mlx5_rsc_dump_trigger(dev, cmd, page); + if (err) { + mlx5_core_err(dev, "Resource dump: Failed to trigger dump, %d\n", err); + return err; + } + *size = MLX5_GET(resource_dump, cmd->cmd, size); + more_dump = MLX5_GET(resource_dump, cmd->cmd, more_dump); + + return more_dump; +} +EXPORT_SYMBOL(mlx5_rsc_dump_next); + +#define MLX5_RSC_DUMP_MENU_SEGMENT 0xffff +static int mlx5_rsc_dump_menu(struct mlx5_core_dev *dev) +{ + struct mlx5_rsc_dump_cmd *cmd = NULL; + struct mlx5_rsc_key key = {}; + struct page *page; + int start_idx = 0; + int size; + int err; + + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + key.rsc = MLX5_SGMT_TYPE_MENU; + key.size = PAGE_SIZE; + cmd = mlx5_rsc_dump_cmd_create(dev, &key); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto free_page; + } + MLX5_SET(resource_dump, cmd->cmd, segment_type, MLX5_RSC_DUMP_MENU_SEGMENT); + + do { + err = mlx5_rsc_dump_next(dev, cmd, page, &size); + if (err < 0) + goto destroy_cmd; + + start_idx = mlx5_rsc_dump_read_menu_sgmt(dev->rsc_dump, page, size, start_idx); + + } while (err > 0); + +destroy_cmd: + mlx5_rsc_dump_cmd_destroy(cmd); +free_page: + __free_page(page); + + return err; +} + +static int mlx5_rsc_dump_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, + u32 *mkey) +{ + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + void *mkc; + u32 *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET(mkc, mkc, pd, pdn); + MLX5_SET(mkc, mkc, length64, 1); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + + err = mlx5_core_create_mkey(mdev, mkey, in, inlen); + + kvfree(in); + return err; +} + +struct mlx5_rsc_dump *mlx5_rsc_dump_create(struct mlx5_core_dev *dev) +{ + struct mlx5_rsc_dump *rsc_dump; + + if (!MLX5_CAP_DEBUG(dev, resource_dump)) { + mlx5_core_dbg(dev, "Resource dump: capability not present\n"); + return NULL; + } + rsc_dump = kzalloc(sizeof(*rsc_dump), GFP_KERNEL); + if (!rsc_dump) + return ERR_PTR(-ENOMEM); + + return rsc_dump; +} + +void mlx5_rsc_dump_destroy(struct mlx5_core_dev *dev) +{ + if (IS_ERR_OR_NULL(dev->rsc_dump)) + return; + kfree(dev->rsc_dump); +} + +int mlx5_rsc_dump_init(struct mlx5_core_dev *dev) +{ + struct mlx5_rsc_dump *rsc_dump = dev->rsc_dump; + int err; + + if (IS_ERR_OR_NULL(dev->rsc_dump)) + return 0; + + err = mlx5_core_alloc_pd(dev, &rsc_dump->pdn); + if (err) { + mlx5_core_warn(dev, "Resource dump: Failed to allocate PD %d\n", err); + return err; + } + err = mlx5_rsc_dump_create_mkey(dev, rsc_dump->pdn, &rsc_dump->mkey); + if (err) { + mlx5_core_err(dev, "Resource dump: Failed to create mkey, %d\n", err); + goto free_pd; + } + err = mlx5_rsc_dump_menu(dev); + if (err) { + mlx5_core_err(dev, "Resource dump: Failed to read menu, %d\n", err); + goto destroy_mkey; + } + return err; + +destroy_mkey: + mlx5_core_destroy_mkey(dev, rsc_dump->mkey); +free_pd: + mlx5_core_dealloc_pd(dev, rsc_dump->pdn); + return err; +} + +void mlx5_rsc_dump_cleanup(struct mlx5_core_dev *dev) +{ + if (IS_ERR_OR_NULL(dev->rsc_dump)) + return; + + mlx5_core_destroy_mkey(dev, dev->rsc_dump->mkey); + mlx5_core_dealloc_pd(dev, dev->rsc_dump->pdn); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.h new file mode 100644 index 0000000000..64c4956db6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_RSC_DUMP_H +#define __MLX5_RSC_DUMP_H + +#include +#include +#include "mlx5_core.h" + +#define MLX5_RSC_DUMP_ALL 0xFFFF +struct mlx5_rsc_dump_cmd; +struct mlx5_rsc_dump; + +struct mlx5_rsc_dump *mlx5_rsc_dump_create(struct mlx5_core_dev *dev); +void mlx5_rsc_dump_destroy(struct mlx5_core_dev *dev); + +int mlx5_rsc_dump_init(struct mlx5_core_dev *dev); +void mlx5_rsc_dump_cleanup(struct mlx5_core_dev *dev); + +struct mlx5_rsc_dump_cmd *mlx5_rsc_dump_cmd_create(struct mlx5_core_dev *dev, + struct mlx5_rsc_key *key); +void mlx5_rsc_dump_cmd_destroy(struct mlx5_rsc_dump_cmd *cmd); + +int mlx5_rsc_dump_next(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd, + struct page *page, int *size); +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c new file mode 100644 index 0000000000..d000236ddb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "ecpf.h" + +bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) +{ + return (ioread32be(&dev->iseg->initializing) >> MLX5_ECPU_BIT_NUM) & 1; +} + +static bool mlx5_ecpf_esw_admins_host_pf(const struct mlx5_core_dev *dev) +{ + /* In separate host mode, PF enables itself. + * When ECPF is eswitch manager, eswitch enables host PF after + * eswitch is setup. + */ + return mlx5_core_is_ecpf_esw_manager(dev); +} + +int mlx5_cmd_host_pf_enable_hca(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {}; + + MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); + MLX5_SET(enable_hca_in, in, function_id, 0); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0); + return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); +} + +int mlx5_cmd_host_pf_disable_hca(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {}; + + MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); + MLX5_SET(disable_hca_in, in, function_id, 0); + MLX5_SET(disable_hca_in, in, embedded_cpu_function, 0); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_host_pf_init(struct mlx5_core_dev *dev) +{ + int err; + + if (mlx5_ecpf_esw_admins_host_pf(dev)) + return 0; + + /* ECPF shall enable HCA for host PF in the same way a PF + * does this for its VFs when ECPF is not a eswitch manager. + */ + err = mlx5_cmd_host_pf_enable_hca(dev); + if (err) + mlx5_core_err(dev, "Failed to enable external host PF HCA err(%d)\n", err); + + return err; +} + +static void mlx5_host_pf_cleanup(struct mlx5_core_dev *dev) +{ + int err; + + if (mlx5_ecpf_esw_admins_host_pf(dev)) + return; + + err = mlx5_cmd_host_pf_disable_hca(dev); + if (err) { + mlx5_core_err(dev, "Failed to disable external host PF HCA err(%d)\n", err); + return; + } +} + +int mlx5_ec_init(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_ecpf(dev)) + return 0; + + return mlx5_host_pf_init(dev); +} + +void mlx5_ec_cleanup(struct mlx5_core_dev *dev) +{ + int err; + + if (!mlx5_core_is_ecpf(dev)) + return; + + mlx5_host_pf_cleanup(dev); + + err = mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_HOST_PF]); + if (err) + mlx5_core_warn(dev, "Timeout reclaiming external host PF pages err(%d)\n", err); + + err = mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_VF]); + if (err) + mlx5_core_warn(dev, "Timeout reclaiming external host VFs pages err(%d)\n", err); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h new file mode 100644 index 0000000000..40b6ad76dc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_ECPF_H__ +#define __MLX5_ECPF_H__ + +#include +#include "mlx5_core.h" + +#ifdef CONFIG_MLX5_ESWITCH + +enum { + MLX5_ECPU_BIT_NUM = 23, +}; + +bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev); +int mlx5_ec_init(struct mlx5_core_dev *dev); +void mlx5_ec_cleanup(struct mlx5_core_dev *dev); + +int mlx5_cmd_host_pf_enable_hca(struct mlx5_core_dev *dev); +int mlx5_cmd_host_pf_disable_hca(struct mlx5_core_dev *dev); + +#else /* CONFIG_MLX5_ESWITCH */ + +static inline bool +mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; } +static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; } +static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {} + +#endif /* CONFIG_MLX5_ESWITCH */ + +#endif /* __MLX5_ECPF_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h new file mode 100644 index 0000000000..20a6bc1a23 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -0,0 +1,1221 @@ +/* + * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __MLX5_EN_H__ +#define __MLX5_EN_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "wq.h" +#include "mlx5_core.h" +#include "en_stats.h" +#include "en/dcbnl.h" +#include "en/fs.h" +#include "en/qos.h" +#include "lib/hv_vhca.h" +#include "lib/clock.h" +#include "en/rx_res.h" +#include "en/selq.h" + +extern const struct net_device_ops mlx5e_netdev_ops; +struct page_pool; + +#define MLX5E_METADATA_ETHER_TYPE (0x8CE4) +#define MLX5E_METADATA_ETHER_LEN 8 + +#define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) + +#define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu)) +#define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu)) + +#define MLX5E_MAX_NUM_TC 8 +#define MLX5E_MAX_NUM_MQPRIO_CH_TC TC_QOPT_MAX_QUEUE + +#define MLX5_RX_HEADROOM NET_SKB_PAD +#define MLX5_SKB_FRAG_SZ(len) (SKB_DATA_ALIGN(len) + \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + +#define MLX5E_RX_MAX_HEAD (256) +#define MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE (9) +#define MLX5E_SHAMPO_WQ_HEADER_PER_PAGE (PAGE_SIZE >> MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE) +#define MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE (64) +#define MLX5E_SHAMPO_WQ_RESRV_SIZE (64 * 1024) +#define MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE (4096) + +#define MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev) \ + (6 + MLX5_CAP_GEN(mdev, cache_line_128byte)) /* HW restriction */ +#define MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, req) \ + max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req) +#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) \ + MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, order_base_2(MLX5E_RX_MAX_HEAD)) + +#define MLX5_MPWRQ_MAX_LOG_WQE_SZ 18 + +/* Keep in sync with mlx5e_mpwrq_log_wqe_sz. + * These are theoretical maximums, which can be further restricted by + * capabilities. These values are used for static resource allocations and + * sanity checks. + * MLX5_SEND_WQE_MAX_SIZE is a bit bigger than the maximum cacheline-aligned WQE + * size actually used at runtime, but it's not a problem when calculating static + * array sizes. + */ +#define MLX5_UMR_MAX_FLEX_SPACE \ + (ALIGN_DOWN(MLX5_SEND_WQE_MAX_SIZE - sizeof(struct mlx5e_umr_wqe), \ + MLX5_UMR_FLEX_ALIGNMENT)) +#define MLX5_MPWRQ_MAX_PAGES_PER_WQE \ + rounddown_pow_of_two(MLX5_UMR_MAX_FLEX_SPACE / sizeof(struct mlx5_mtt)) + +#define MLX5E_MAX_RQ_NUM_MTTS \ + (ALIGN_DOWN(U16_MAX, 4) * 2) /* Fits into u16 and aligned by WQEBB. */ +#define MLX5E_MAX_RQ_NUM_KSMS (U16_MAX - 1) /* So that num_ksms fits into u16. */ +#define MLX5E_ORDER2_MAX_PACKET_MTU (order_base_2(10 * 1024)) + +#define MLX5E_MIN_SKB_FRAG_SZ (MLX5_SKB_FRAG_SZ(MLX5_RX_HEADROOM)) +#define MLX5E_LOG_MAX_RX_WQE_BULK \ + (ilog2(PAGE_SIZE / roundup_pow_of_two(MLX5E_MIN_SKB_FRAG_SZ))) + +#define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x6 +#define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE 0xa +#define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xd + +#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE (1 + MLX5E_LOG_MAX_RX_WQE_BULK) +#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa +#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd + +#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2 + +#define MLX5E_DEFAULT_LRO_TIMEOUT 32 +#define MLX5E_LRO_TIMEOUT_ARR_SIZE 4 + +#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 +#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3 +#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20 +#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10 +#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE 0x10 +#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20 +#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80 +#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW 0x2 + +#define MLX5E_MIN_NUM_CHANNELS 0x1 +#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE / 2) +#define MLX5E_TX_CQ_POLL_BUDGET 128 +#define MLX5E_TX_XSK_POLL_BUDGET 64 +#define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */ + +#define MLX5E_KLM_UMR_WQE_SZ(sgl_len)\ + (sizeof(struct mlx5e_umr_wqe) +\ + (sizeof(struct mlx5_klm) * (sgl_len))) + +#define MLX5E_KLM_UMR_WQEBBS(klm_entries) \ + (DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_BB)) + +#define MLX5E_KLM_UMR_DS_CNT(klm_entries)\ + (DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_DS)) + +#define MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size)\ + (((wqe_size) - sizeof(struct mlx5e_umr_wqe)) / sizeof(struct mlx5_klm)) + +#define MLX5E_KLM_ENTRIES_PER_WQE(wqe_size)\ + ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT) + +#define MLX5E_MAX_KLM_PER_WQE(mdev) \ + MLX5E_KLM_ENTRIES_PER_WQE(MLX5_SEND_WQE_BB * mlx5e_get_max_sq_aligned_wqebbs(mdev)) + +#define mlx5e_state_dereference(priv, p) \ + rcu_dereference_protected((p), lockdep_is_held(&(priv)->state_lock)) + +static inline u8 mlx5e_get_num_lag_ports(struct mlx5_core_dev *mdev) +{ + if (mlx5_lag_is_lacp_owner(mdev)) + return 1; + + return clamp_t(u8, MLX5_CAP_GEN(mdev, num_lag_ports), 1, MLX5_MAX_PORTS); +} + +static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) +{ + switch (wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW, + wq_size / 2); + default: + return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES, + wq_size / 2); + } +} + +/* Use this function to get max num channels (rxqs/txqs) only to create netdev */ +static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) +{ + return is_kdump_kernel() ? + MLX5E_MIN_NUM_CHANNELS : + min_t(int, mlx5_comp_vectors_max(mdev), MLX5E_MAX_NUM_CHANNELS); +} + +/* The maximum WQE size can be retrieved by max_wqe_sz_sq in + * bytes units. Driver hardens the limitation to 1KB (16 + * WQEBBs), unless firmware capability is stricter. + */ +static inline u8 mlx5e_get_max_sq_wqebbs(struct mlx5_core_dev *mdev) +{ + BUILD_BUG_ON(MLX5_SEND_WQE_MAX_WQEBBS > U8_MAX); + + return (u8)min_t(u16, MLX5_SEND_WQE_MAX_WQEBBS, + MLX5_CAP_GEN(mdev, max_wqe_sz_sq) / MLX5_SEND_WQE_BB); +} + +static inline u8 mlx5e_get_max_sq_aligned_wqebbs(struct mlx5_core_dev *mdev) +{ +/* The return value will be multiplied by MLX5_SEND_WQEBB_NUM_DS. + * Since max_sq_wqebbs may be up to MLX5_SEND_WQE_MAX_WQEBBS == 16, + * see mlx5e_get_max_sq_wqebbs(), the multiplication (16 * 4 == 64) + * overflows the 6-bit DS field of Ctrl Segment. Use a bound lower + * than MLX5_SEND_WQE_MAX_WQEBBS to let a full-session WQE be + * cache-aligned. + */ + u8 wqebbs = mlx5e_get_max_sq_wqebbs(mdev); + + wqebbs = min_t(u8, wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 1); +#if L1_CACHE_BYTES >= 128 + wqebbs = ALIGN_DOWN(wqebbs, 2); +#endif + return wqebbs; +} + +struct mlx5e_tx_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_eth_seg eth; + struct mlx5_wqe_data_seg data[]; +}; + +struct mlx5e_rx_wqe_ll { + struct mlx5_wqe_srq_next_seg next; + struct mlx5_wqe_data_seg data[]; +}; + +struct mlx5e_rx_wqe_cyc { + DECLARE_FLEX_ARRAY(struct mlx5_wqe_data_seg, data); +}; + +struct mlx5e_umr_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_umr_ctrl_seg uctrl; + struct mlx5_mkey_seg mkc; + union { + DECLARE_FLEX_ARRAY(struct mlx5_mtt, inline_mtts); + DECLARE_FLEX_ARRAY(struct mlx5_klm, inline_klms); + DECLARE_FLEX_ARRAY(struct mlx5_ksm, inline_ksms); + }; +}; + +enum mlx5e_priv_flag { + MLX5E_PFLAG_RX_CQE_BASED_MODER, + MLX5E_PFLAG_TX_CQE_BASED_MODER, + MLX5E_PFLAG_RX_CQE_COMPRESS, + MLX5E_PFLAG_RX_STRIDING_RQ, + MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, + MLX5E_PFLAG_XDP_TX_MPWQE, + MLX5E_PFLAG_SKB_TX_MPWQE, + MLX5E_PFLAG_TX_PORT_TS, + MLX5E_NUM_PFLAGS, /* Keep last */ +}; + +#define MLX5E_SET_PFLAG(params, pflag, enable) \ + do { \ + if (enable) \ + (params)->pflags |= BIT(pflag); \ + else \ + (params)->pflags &= ~(BIT(pflag)); \ + } while (0) + +#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (BIT(pflag)))) + +enum packet_merge { + MLX5E_PACKET_MERGE_NONE, + MLX5E_PACKET_MERGE_LRO, + MLX5E_PACKET_MERGE_SHAMPO, +}; + +struct mlx5e_packet_merge_param { + enum packet_merge type; + u32 timeout; + struct { + u8 match_criteria_type; + u8 alignment_granularity; + } shampo; +}; + +struct mlx5e_params { + u8 log_sq_size; + u8 rq_wq_type; + u8 log_rq_mtu_frames; + u16 num_channels; + struct { + u16 mode; + u8 num_tc; + struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; + struct { + u64 max_rate[TC_MAX_QUEUE]; + u32 hw_id[TC_MAX_QUEUE]; + } channel; + } mqprio; + bool rx_cqe_compress_def; + struct dim_cq_moder rx_cq_moderation; + struct dim_cq_moder tx_cq_moderation; + struct mlx5e_packet_merge_param packet_merge; + u8 tx_min_inline_mode; + bool vlan_strip_disable; + bool scatter_fcs_en; + bool rx_dim_enabled; + bool tx_dim_enabled; + u32 pflags; + struct bpf_prog *xdp_prog; + struct mlx5e_xsk *xsk; + unsigned int sw_mtu; + int hard_mtu; + bool ptp_rx; + __be32 terminate_lkey_be; +}; + +static inline u8 mlx5e_get_dcb_num_tc(struct mlx5e_params *params) +{ + return params->mqprio.mode == TC_MQPRIO_MODE_DCB ? + params->mqprio.num_tc : 1; +} + +/* Keep this enum consistent with the corresponding strings array + * declared in en/reporter_rx.c + */ +enum { + MLX5E_RQ_STATE_ENABLED = 0, + MLX5E_RQ_STATE_RECOVERING, + MLX5E_RQ_STATE_DIM, + MLX5E_RQ_STATE_NO_CSUM_COMPLETE, + MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */ + MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, /* set when mini_cqe_resp_stride_index cap is used */ + MLX5E_RQ_STATE_SHAMPO, /* set when SHAMPO cap is used */ + MLX5E_RQ_STATE_MINI_CQE_ENHANCED, /* set when enhanced mini_cqe_cap is used */ + MLX5E_RQ_STATE_XSK, /* set to indicate an xsk rq */ + MLX5E_NUM_RQ_STATES, /* Must be kept last */ +}; + +struct mlx5e_cq { + /* data path - accessed per cqe */ + struct mlx5_cqwq wq; + + /* data path - accessed per napi poll */ + u16 event_ctr; + struct napi_struct *napi; + struct mlx5_core_cq mcq; + struct mlx5e_ch_stats *ch_stats; + + /* control */ + struct net_device *netdev; + struct mlx5_core_dev *mdev; + struct mlx5e_priv *priv; + struct mlx5_wq_ctrl wq_ctrl; +} ____cacheline_aligned_in_smp; + +struct mlx5e_cq_decomp { + /* cqe decompression */ + struct mlx5_cqe64 title; + struct mlx5_mini_cqe8 mini_arr[MLX5_MINI_CQE_ARRAY_SIZE]; + u8 mini_arr_idx; + u16 left; + u16 wqe_counter; + bool last_cqe_title; +} ____cacheline_aligned_in_smp; + +enum mlx5e_dma_map_type { + MLX5E_DMA_MAP_SINGLE, + MLX5E_DMA_MAP_PAGE +}; + +struct mlx5e_sq_dma { + dma_addr_t addr; + u32 size; + enum mlx5e_dma_map_type type; +}; + +/* Keep this enum consistent with with the corresponding strings array + * declared in en/reporter_tx.c + */ +enum { + MLX5E_SQ_STATE_ENABLED = 0, + MLX5E_SQ_STATE_MPWQE, + MLX5E_SQ_STATE_RECOVERING, + MLX5E_SQ_STATE_IPSEC, + MLX5E_SQ_STATE_DIM, + MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, + MLX5E_SQ_STATE_PENDING_XSK_TX, + MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, + MLX5E_SQ_STATE_XDP_MULTIBUF, + MLX5E_NUM_SQ_STATES, /* Must be kept last */ +}; + +struct mlx5e_tx_mpwqe { + /* Current MPWQE session */ + struct mlx5e_tx_wqe *wqe; + u32 bytes_count; + u8 ds_count; + u8 pkt_count; + u8 inline_on; +}; + +struct mlx5e_skb_fifo { + struct sk_buff **fifo; + u16 *pc; + u16 *cc; + u16 mask; +}; + +struct mlx5e_ptpsq; + +struct mlx5e_txqsq { + /* data path */ + + /* dirtied @completion */ + u16 cc; + u16 skb_fifo_cc; + u32 dma_fifo_cc; + struct dim dim; /* Adaptive Moderation */ + + /* dirtied @xmit */ + u16 pc ____cacheline_aligned_in_smp; + u16 skb_fifo_pc; + u32 dma_fifo_pc; + struct mlx5e_tx_mpwqe mpwqe; + + struct mlx5e_cq cq; + + /* read only */ + struct mlx5_wq_cyc wq; + u32 dma_fifo_mask; + struct mlx5e_sq_stats *stats; + struct { + struct mlx5e_sq_dma *dma_fifo; + struct mlx5e_skb_fifo skb_fifo; + struct mlx5e_tx_wqe_info *wqe_info; + } db; + void __iomem *uar_map; + struct netdev_queue *txq; + u32 sqn; + u16 stop_room; + u8 max_sq_mpw_wqebbs; + u8 min_inline_mode; + struct device *pdev; + __be32 mkey_be; + unsigned long state; + unsigned int hw_mtu; + struct mlx5_clock *clock; + struct net_device *netdev; + struct mlx5_core_dev *mdev; + struct mlx5e_channel *channel; + struct mlx5e_priv *priv; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + int ch_ix; + int txq_ix; + u32 rate_limit; + struct work_struct recover_work; + struct mlx5e_ptpsq *ptpsq; + cqe_ts_to_ns ptp_cyc2time; +} ____cacheline_aligned_in_smp; + +struct mlx5e_xdp_info_fifo { + union mlx5e_xdp_info *xi; + u32 *cc; + u32 *pc; + u32 mask; +}; + +struct mlx5e_xdpsq; +struct mlx5e_xmit_data; +typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *); +typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *, + struct mlx5e_xmit_data *, + int); + +struct mlx5e_xdpsq { + /* data path */ + + /* dirtied @completion */ + u32 xdpi_fifo_cc; + u16 cc; + + /* dirtied @xmit */ + u32 xdpi_fifo_pc ____cacheline_aligned_in_smp; + u16 pc; + struct mlx5_wqe_ctrl_seg *doorbell_cseg; + struct mlx5e_tx_mpwqe mpwqe; + + struct mlx5e_cq cq; + + /* read only */ + struct xsk_buff_pool *xsk_pool; + struct mlx5_wq_cyc wq; + struct mlx5e_xdpsq_stats *stats; + mlx5e_fp_xmit_xdp_frame_check xmit_xdp_frame_check; + mlx5e_fp_xmit_xdp_frame xmit_xdp_frame; + struct { + struct mlx5e_xdp_wqe_info *wqe_info; + struct mlx5e_xdp_info_fifo xdpi_fifo; + } db; + void __iomem *uar_map; + u32 sqn; + struct device *pdev; + __be32 mkey_be; + u16 stop_room; + u8 max_sq_mpw_wqebbs; + u8 min_inline_mode; + unsigned long state; + unsigned int hw_mtu; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5e_channel *channel; +} ____cacheline_aligned_in_smp; + +struct mlx5e_ktls_resync_resp; + +struct mlx5e_icosq { + /* data path */ + u16 cc; + u16 pc; + + struct mlx5_wqe_ctrl_seg *doorbell_cseg; + struct mlx5e_cq cq; + + /* write@xmit, read@completion */ + struct { + struct mlx5e_icosq_wqe_info *wqe_info; + } db; + + /* read only */ + struct mlx5_wq_cyc wq; + void __iomem *uar_map; + u32 sqn; + u16 reserved_room; + unsigned long state; + struct mlx5e_ktls_resync_resp *ktls_resync; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5e_channel *channel; + + struct work_struct recover_work; +} ____cacheline_aligned_in_smp; + +struct mlx5e_frag_page { + struct page *page; + u16 frags; +}; + +enum mlx5e_wqe_frag_flag { + MLX5E_WQE_FRAG_LAST_IN_PAGE, + MLX5E_WQE_FRAG_SKIP_RELEASE, +}; + +struct mlx5e_wqe_frag_info { + union { + struct mlx5e_frag_page *frag_page; + struct xdp_buff **xskp; + }; + u32 offset; + u8 flags; +}; + +union mlx5e_alloc_units { + DECLARE_FLEX_ARRAY(struct mlx5e_frag_page, frag_pages); + DECLARE_FLEX_ARRAY(struct page *, pages); + DECLARE_FLEX_ARRAY(struct xdp_buff *, xsk_buffs); +}; + +struct mlx5e_mpw_info { + u16 consumed_strides; + DECLARE_BITMAP(skip_release_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE); + struct mlx5e_frag_page linear_page; + union mlx5e_alloc_units alloc_units; +}; + +#define MLX5E_MAX_RX_FRAGS 4 + +struct mlx5e_rq; +typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*); +typedef struct sk_buff * +(*mlx5e_fp_skb_from_cqe_mpwrq)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + struct mlx5_cqe64 *cqe, u16 cqe_bcnt, + u32 head_offset, u32 page_idx); +typedef struct sk_buff * +(*mlx5e_fp_skb_from_cqe)(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, + struct mlx5_cqe64 *cqe, u32 cqe_bcnt); +typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq); +typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16); +typedef void (*mlx5e_fp_shampo_dealloc_hd)(struct mlx5e_rq*, u16, u16, bool); + +int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool xsk); +void mlx5e_rq_set_trap_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params); + +enum mlx5e_rq_flag { + MLX5E_RQ_FLAG_XDP_XMIT, + MLX5E_RQ_FLAG_XDP_REDIRECT, +}; + +struct mlx5e_rq_frag_info { + int frag_size; + int frag_stride; +}; + +struct mlx5e_rq_frags_info { + struct mlx5e_rq_frag_info arr[MLX5E_MAX_RX_FRAGS]; + u8 num_frags; + u8 log_num_frags; + u16 wqe_bulk; + u16 refill_unit; + u8 wqe_index_mask; +}; + +struct mlx5e_dma_info { + dma_addr_t addr; + union { + struct mlx5e_frag_page *frag_page; + struct page *page; + }; +}; + +struct mlx5e_shampo_hd { + u32 mkey; + struct mlx5e_dma_info *info; + struct mlx5e_frag_page *pages; + u16 curr_page_index; + u16 hd_per_wq; + u16 hd_per_wqe; + unsigned long *bitmap; + u16 pi; + u16 ci; + __be32 key; + u64 last_addr; +}; + +struct mlx5e_hw_gro_data { + struct sk_buff *skb; + struct flow_keys fk; + int second_ip_id; +}; + +enum mlx5e_mpwrq_umr_mode { + MLX5E_MPWRQ_UMR_MODE_ALIGNED, + MLX5E_MPWRQ_UMR_MODE_UNALIGNED, + MLX5E_MPWRQ_UMR_MODE_OVERSIZED, + MLX5E_MPWRQ_UMR_MODE_TRIPLE, +}; + +struct mlx5e_rq { + /* data path */ + union { + struct { + struct mlx5_wq_cyc wq; + struct mlx5e_wqe_frag_info *frags; + union mlx5e_alloc_units *alloc_units; + struct mlx5e_rq_frags_info info; + mlx5e_fp_skb_from_cqe skb_from_cqe; + } wqe; + struct { + struct mlx5_wq_ll wq; + struct mlx5e_umr_wqe umr_wqe; + struct mlx5e_mpw_info *info; + mlx5e_fp_skb_from_cqe_mpwrq skb_from_cqe_mpwrq; + __be32 umr_mkey_be; + u16 num_strides; + u16 actual_wq_head; + u8 log_stride_sz; + u8 umr_in_progress; + u8 umr_last_bulk; + u8 umr_completed; + u8 min_wqe_bulk; + u8 page_shift; + u8 pages_per_wqe; + u8 umr_wqebbs; + u8 mtts_per_wqe; + u8 umr_mode; + struct mlx5e_shampo_hd *shampo; + } mpwqe; + }; + struct { + u16 headroom; + u32 frame0_sz; + u8 map_dir; /* dma map direction */ + } buff; + + struct device *pdev; + struct net_device *netdev; + struct mlx5e_rq_stats *stats; + struct mlx5e_cq cq; + struct mlx5e_cq_decomp cqd; + struct hwtstamp_config *tstamp; + struct mlx5_clock *clock; + struct mlx5e_icosq *icosq; + struct mlx5e_priv *priv; + + struct mlx5e_hw_gro_data *hw_gro_data; + + mlx5e_fp_handle_rx_cqe handle_rx_cqe; + mlx5e_fp_post_rx_wqes post_wqes; + mlx5e_fp_dealloc_wqe dealloc_wqe; + + unsigned long state; + int ix; + unsigned int hw_mtu; + + struct dim dim; /* Dynamic Interrupt Moderation */ + + /* XDP */ + struct bpf_prog __rcu *xdp_prog; + struct mlx5e_xdpsq *xdpsq; + DECLARE_BITMAP(flags, 8); + struct page_pool *page_pool; + + /* AF_XDP zero-copy */ + struct xsk_buff_pool *xsk_pool; + + struct work_struct recover_work; + + /* control */ + struct mlx5_wq_ctrl wq_ctrl; + __be32 mkey_be; + u8 wq_type; + u32 rqn; + struct mlx5_core_dev *mdev; + struct mlx5e_channel *channel; + struct mlx5e_dma_info wqe_overflow; + + /* XDP read-mostly */ + struct xdp_rxq_info xdp_rxq; + cqe_ts_to_ns ptp_cyc2time; +} ____cacheline_aligned_in_smp; + +enum mlx5e_channel_state { + MLX5E_CHANNEL_STATE_XSK, + MLX5E_CHANNEL_NUM_STATES +}; + +struct mlx5e_channel { + /* data path */ + struct mlx5e_rq rq; + struct mlx5e_xdpsq rq_xdpsq; + struct mlx5e_txqsq sq[MLX5E_MAX_NUM_TC]; + struct mlx5e_icosq icosq; /* internal control operations */ + struct mlx5e_txqsq __rcu * __rcu *qos_sqs; + bool xdp; + struct napi_struct napi; + struct device *pdev; + struct net_device *netdev; + __be32 mkey_be; + u16 qos_sqs_size; + u8 num_tc; + u8 lag_port; + + /* XDP_REDIRECT */ + struct mlx5e_xdpsq xdpsq; + + /* AF_XDP zero-copy */ + struct mlx5e_rq xskrq; + struct mlx5e_xdpsq xsksq; + + /* Async ICOSQ */ + struct mlx5e_icosq async_icosq; + /* async_icosq can be accessed from any CPU - the spinlock protects it. */ + spinlock_t async_icosq_lock; + + /* data path - accessed per napi poll */ + const struct cpumask *aff_mask; + struct mlx5e_ch_stats *stats; + + /* control */ + struct mlx5e_priv *priv; + struct mlx5_core_dev *mdev; + struct hwtstamp_config *tstamp; + DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES); + int ix; + int cpu; + /* Sync between icosq recovery and XSK enable/disable. */ + struct mutex icosq_recovery_lock; +}; + +struct mlx5e_ptp; + +struct mlx5e_channels { + struct mlx5e_channel **c; + struct mlx5e_ptp *ptp; + unsigned int num; + struct mlx5e_params params; +}; + +struct mlx5e_channel_stats { + struct mlx5e_ch_stats ch; + struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC]; + struct mlx5e_rq_stats rq; + struct mlx5e_rq_stats xskrq; + struct mlx5e_xdpsq_stats rq_xdpsq; + struct mlx5e_xdpsq_stats xdpsq; + struct mlx5e_xdpsq_stats xsksq; +} ____cacheline_aligned_in_smp; + +struct mlx5e_ptp_stats { + struct mlx5e_ch_stats ch; + struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC]; + struct mlx5e_ptp_cq_stats cq[MLX5E_MAX_NUM_TC]; + struct mlx5e_rq_stats rq; +} ____cacheline_aligned_in_smp; + +enum { + MLX5E_STATE_OPENED, + MLX5E_STATE_DESTROYING, + MLX5E_STATE_XDP_TX_ENABLED, + MLX5E_STATE_XDP_ACTIVE, + MLX5E_STATE_CHANNELS_ACTIVE, +}; + +struct mlx5e_modify_sq_param { + int curr_state; + int next_state; + int rl_update; + int rl_index; + bool qos_update; + u16 qos_queue_group_id; +}; + +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) +struct mlx5e_hv_vhca_stats_agent { + struct mlx5_hv_vhca_agent *agent; + struct delayed_work work; + u16 delay; + void *buf; +}; +#endif + +struct mlx5e_xsk { + /* XSK buffer pools are stored separately from channels, + * because we don't want to lose them when channels are + * recreated. The kernel also stores buffer pool, but it doesn't + * distinguish between zero-copy and non-zero-copy UMEMs, so + * rely on our mechanism. + */ + struct xsk_buff_pool **pools; + u16 refcnt; + bool ever_used; +}; + +/* Temporary storage for variables that are allocated when struct mlx5e_priv is + * initialized, and used where we can't allocate them because that functions + * must not fail. Use with care and make sure the same variable is not used + * simultaneously by multiple users. + */ +struct mlx5e_scratchpad { + cpumask_var_t cpumask; +}; + +struct mlx5e_trap; +struct mlx5e_htb; + +struct mlx5e_priv { + /* priv data path fields - start */ + struct mlx5e_selq selq; + struct mlx5e_txqsq **txq2sq; +#ifdef CONFIG_MLX5_CORE_EN_DCB + struct mlx5e_dcbx_dp dcbx_dp; +#endif + /* priv data path fields - end */ + + unsigned long state; + struct mutex state_lock; /* Protects Interface state */ + struct mlx5e_rq drop_rq; + + struct mlx5e_channels channels; + u32 tisn[MLX5_MAX_PORTS][MLX5E_MAX_NUM_TC]; + struct mlx5e_rx_res *rx_res; + u32 *tx_rates; + + struct mlx5e_flow_steering *fs; + + struct workqueue_struct *wq; + struct work_struct update_carrier_work; + struct work_struct set_rx_mode_work; + struct work_struct tx_timeout_work; + struct work_struct update_stats_work; + struct work_struct monitor_counters_work; + struct mlx5_nb monitor_counters_nb; + + struct mlx5_core_dev *mdev; + struct net_device *netdev; + struct mlx5e_trap *en_trap; + struct mlx5e_stats stats; + struct mlx5e_channel_stats **channel_stats; + struct mlx5e_channel_stats trap_stats; + struct mlx5e_ptp_stats ptp_stats; + struct mlx5e_sq_stats **htb_qos_sq_stats; + u16 htb_max_qos_sqs; + u16 stats_nch; + u16 max_nch; + u8 max_opened_tc; + bool tx_ptp_opened; + bool rx_ptp_opened; + struct hwtstamp_config tstamp; + u16 q_counter; + u16 drop_rq_q_counter; + struct notifier_block events_nb; + struct notifier_block blocking_events_nb; + + struct udp_tunnel_nic_info nic_info; +#ifdef CONFIG_MLX5_CORE_EN_DCB + struct mlx5e_dcbx dcbx; +#endif + + const struct mlx5e_profile *profile; + void *ppriv; +#ifdef CONFIG_MLX5_MACSEC + struct mlx5e_macsec *macsec; +#endif +#ifdef CONFIG_MLX5_EN_IPSEC + struct mlx5e_ipsec *ipsec; +#endif +#ifdef CONFIG_MLX5_EN_TLS + struct mlx5e_tls *tls; +#endif + struct devlink_health_reporter *tx_reporter; + struct devlink_health_reporter *rx_reporter; + struct mlx5e_xsk xsk; +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) + struct mlx5e_hv_vhca_stats_agent stats_agent; +#endif + struct mlx5e_scratchpad scratchpad; + struct mlx5e_htb *htb; + struct mlx5e_mqprio_rl *mqprio_rl; + struct dentry *dfs_root; +}; + +struct mlx5e_dev { + struct mlx5e_priv *priv; + struct devlink_port dl_port; +}; + +struct mlx5e_rx_handlers { + mlx5e_fp_handle_rx_cqe handle_rx_cqe; + mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe; + mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe_shampo; +}; + +extern const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic; + +enum mlx5e_profile_feature { + MLX5E_PROFILE_FEATURE_PTP_RX, + MLX5E_PROFILE_FEATURE_PTP_TX, + MLX5E_PROFILE_FEATURE_QOS_HTB, + MLX5E_PROFILE_FEATURE_FS_VLAN, + MLX5E_PROFILE_FEATURE_FS_TC, +}; + +struct mlx5e_profile { + int (*init)(struct mlx5_core_dev *mdev, + struct net_device *netdev); + void (*cleanup)(struct mlx5e_priv *priv); + int (*init_rx)(struct mlx5e_priv *priv); + void (*cleanup_rx)(struct mlx5e_priv *priv); + int (*init_tx)(struct mlx5e_priv *priv); + void (*cleanup_tx)(struct mlx5e_priv *priv); + void (*enable)(struct mlx5e_priv *priv); + void (*disable)(struct mlx5e_priv *priv); + int (*update_rx)(struct mlx5e_priv *priv); + void (*update_stats)(struct mlx5e_priv *priv); + void (*update_carrier)(struct mlx5e_priv *priv); + int (*max_nch_limit)(struct mlx5_core_dev *mdev); + unsigned int (*stats_grps_num)(struct mlx5e_priv *priv); + mlx5e_stats_grp_t *stats_grps; + const struct mlx5e_rx_handlers *rx_handlers; + int max_tc; + u32 features; +}; + +#define mlx5e_profile_feature_cap(profile, feature) \ + ((profile)->features & BIT(MLX5E_PROFILE_FEATURE_##feature)) + +void mlx5e_build_ptys2ethtool_map(void); + +bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); + +void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close); +void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); +void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s); + +int mlx5e_self_test_num(struct mlx5e_priv *priv); +int mlx5e_self_test_fill_strings(struct mlx5e_priv *priv, u8 *data); +void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest, + u64 *buf); +void mlx5e_set_rx_mode_work(struct work_struct *work); + +int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr); +int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr); +int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val, bool rx_filter); + +int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, + u16 vid); +int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, + u16 vid); +void mlx5e_timestamp_init(struct mlx5e_priv *priv); + +struct mlx5e_xsk_param; + +struct mlx5e_rq_param; +int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, + struct mlx5e_xsk_param *xsk, int node, + struct mlx5e_rq *rq); +#define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */ +int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time); +void mlx5e_close_rq(struct mlx5e_rq *rq); +int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param); +void mlx5e_destroy_rq(struct mlx5e_rq *rq); + +struct mlx5e_sq_param; +int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool, + struct mlx5e_xdpsq *sq, bool is_redirect); +void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq); + +struct mlx5e_create_cq_param { + struct napi_struct *napi; + struct mlx5e_ch_stats *ch_stats; + int node; + int ix; +}; + +struct mlx5e_cq_param; +int mlx5e_open_cq(struct mlx5e_priv *priv, struct dim_cq_moder moder, + struct mlx5e_cq_param *param, struct mlx5e_create_cq_param *ccp, + struct mlx5e_cq *cq); +void mlx5e_close_cq(struct mlx5e_cq *cq); + +int mlx5e_open_locked(struct net_device *netdev); +int mlx5e_close_locked(struct net_device *netdev); + +void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c); +void mlx5e_trigger_napi_sched(struct napi_struct *napi); + +int mlx5e_open_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *chs); +void mlx5e_close_channels(struct mlx5e_channels *chs); + +/* Function pointer to be used to modify HW or kernel settings while + * switching channels + */ +typedef int (*mlx5e_fp_preactivate)(struct mlx5e_priv *priv, void *context); +#define MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(fn) \ +int fn##_ctx(struct mlx5e_priv *priv, void *context) \ +{ \ + return fn(priv); \ +} +int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv); +int mlx5e_safe_switch_params(struct mlx5e_priv *priv, + struct mlx5e_params *new_params, + mlx5e_fp_preactivate preactivate, + void *context, bool reset); +int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv); +int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context); +void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); +void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); +int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx); + +int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state); +void mlx5e_activate_rq(struct mlx5e_rq *rq); +void mlx5e_deactivate_rq(struct mlx5e_rq *rq); +void mlx5e_activate_icosq(struct mlx5e_icosq *icosq); +void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq); + +int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, + struct mlx5e_modify_sq_param *p); +int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix, + struct mlx5e_params *params, struct mlx5e_sq_param *param, + struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id, + struct mlx5e_sq_stats *sq_stats); +void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq); +void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq); +void mlx5e_free_txqsq(struct mlx5e_txqsq *sq); +void mlx5e_tx_disable_queue(struct netdev_queue *txq); +int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa); +void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq); +struct mlx5e_create_sq_param; +int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev, + struct mlx5e_sq_param *param, + struct mlx5e_create_sq_param *csp, + u16 qos_queue_group_id, + u32 *sqn); +void mlx5e_tx_err_cqe_work(struct work_struct *recover_work); +void mlx5e_close_txqsq(struct mlx5e_txqsq *sq); + +static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_ETH(mdev, swp) && + MLX5_CAP_ETH(mdev, swp_csum) && MLX5_CAP_ETH(mdev, swp_lso); +} + +extern const struct ethtool_ops mlx5e_ethtool_ops; + +int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey); +int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); +void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); +int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb, + bool enable_mc_lb); +void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc); + +/* common netdev helpers */ +void mlx5e_create_q_counters(struct mlx5e_priv *priv); +void mlx5e_destroy_q_counters(struct mlx5e_priv *priv); +int mlx5e_open_drop_rq(struct mlx5e_priv *priv, + struct mlx5e_rq *drop_rq); +void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq); + +int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn); +void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn); + +int mlx5e_create_tises(struct mlx5e_priv *priv); +void mlx5e_destroy_tises(struct mlx5e_priv *priv); +int mlx5e_update_nic_rx(struct mlx5e_priv *priv); +void mlx5e_update_carrier(struct mlx5e_priv *priv); +int mlx5e_close(struct net_device *netdev); +int mlx5e_open(struct net_device *netdev); + +void mlx5e_queue_update_stats(struct mlx5e_priv *priv); + +int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv); +int mlx5e_set_dev_port_mtu_ctx(struct mlx5e_priv *priv, void *context); +int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, + mlx5e_fp_preactivate preactivate); +void mlx5e_vxlan_set_netdev_info(struct mlx5e_priv *priv); + +/* ethtool helpers */ +void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, + struct ethtool_drvinfo *drvinfo); +void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, + uint32_t stringset, uint8_t *data); +int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset); +void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, + struct ethtool_stats *stats, u64 *data); +void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param); +int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, + struct ethtool_ringparam *param); +void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv, + struct ethtool_channels *ch); +int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, + struct ethtool_channels *ch); +int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal); +int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack); +int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, + struct ethtool_link_ksettings *link_ksettings); +int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, + const struct ethtool_link_ksettings *link_ksettings); +int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc); +int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, + const u8 hfunc); +u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv); +u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv); +int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, + struct ethtool_ts_info *info); +int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, + struct ethtool_flash *flash); +void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv, + struct ethtool_pauseparam *pauseparam); +int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, + struct ethtool_pauseparam *pauseparam); + +/* mlx5e generic netdev management API */ +static inline bool +mlx5e_tx_mpwqe_supported(struct mlx5_core_dev *mdev) +{ + return !is_kdump_kernel() && + MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe); +} + +int mlx5e_get_pf_num_tirs(struct mlx5_core_dev *mdev); +int mlx5e_priv_init(struct mlx5e_priv *priv, + const struct mlx5e_profile *profile, + struct net_device *netdev, + struct mlx5_core_dev *mdev); +void mlx5e_priv_cleanup(struct mlx5e_priv *priv); +struct net_device * +mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile); +int mlx5e_attach_netdev(struct mlx5e_priv *priv); +void mlx5e_detach_netdev(struct mlx5e_priv *priv); +void mlx5e_destroy_netdev(struct mlx5e_priv *priv); +int mlx5e_netdev_change_profile(struct mlx5e_priv *priv, + const struct mlx5e_profile *new_profile, void *new_ppriv); +void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv); +void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv); +void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu); +void mlx5e_rx_dim_work(struct work_struct *work); +void mlx5e_tx_dim_work(struct work_struct *work); + +void mlx5e_set_xdp_feature(struct net_device *netdev); +netdev_features_t mlx5e_features_check(struct sk_buff *skb, + struct net_device *netdev, + netdev_features_t features); +int mlx5e_set_features(struct net_device *netdev, netdev_features_t features); +#ifdef CONFIG_MLX5_ESWITCH +int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac); +int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate); +int mlx5e_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi); +int mlx5e_get_vf_stats(struct net_device *dev, int vf, struct ifla_vf_stats *vf_stats); +#endif +int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey); +#endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c new file mode 100644 index 0000000000..48581ea3ad --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#include "channels.h" +#include "en.h" +#include "en/ptp.h" + +unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs) +{ + return chs->num; +} + +static struct mlx5e_channel *mlx5e_channels_get(struct mlx5e_channels *chs, unsigned int ix) +{ + WARN_ON_ONCE(ix >= mlx5e_channels_get_num(chs)); + return chs->c[ix]; +} + +bool mlx5e_channels_is_xsk(struct mlx5e_channels *chs, unsigned int ix) +{ + struct mlx5e_channel *c = mlx5e_channels_get(chs, ix); + + return test_bit(MLX5E_CHANNEL_STATE_XSK, c->state); +} + +void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn) +{ + struct mlx5e_channel *c = mlx5e_channels_get(chs, ix); + + *rqn = c->rq.rqn; +} + +void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn) +{ + struct mlx5e_channel *c = mlx5e_channels_get(chs, ix); + + WARN_ON_ONCE(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)); + + *rqn = c->xskrq.rqn; +} + +bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn) +{ + struct mlx5e_ptp *c = chs->ptp; + + if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state)) + return false; + + *rqn = c->rq.rqn; + return true; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h new file mode 100644 index 0000000000..637ca90daa --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_EN_CHANNELS_H__ +#define __MLX5_EN_CHANNELS_H__ + +#include + +struct mlx5e_channels; + +unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs); +bool mlx5e_channels_is_xsk(struct mlx5e_channels *chs, unsigned int ix); +void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn); +void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn); +bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn); + +#endif /* __MLX5_EN_CHANNELS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h new file mode 100644 index 0000000000..b59aee75de --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies. */ + +#ifndef __MLX5E_DCBNL_H__ +#define __MLX5E_DCBNL_H__ + +#ifdef CONFIG_MLX5_CORE_EN_DCB + +#define MLX5E_MAX_PRIORITY (8) + +struct mlx5e_cee_config { + /* bw pct for priority group */ + u8 pg_bw_pct[CEE_DCBX_MAX_PGS]; + u8 prio_to_pg_map[CEE_DCBX_MAX_PRIO]; + bool pfc_setting[CEE_DCBX_MAX_PRIO]; + bool pfc_enable; +}; + +struct mlx5e_dcbx { + enum mlx5_dcbx_oper_mode mode; + struct mlx5e_cee_config cee_cfg; /* pending configuration */ + u8 dscp_app_cnt; + + /* The only setting that cannot be read from FW */ + u8 tc_tsa[IEEE_8021QAZ_MAX_TCS]; + u8 cap; + + /* Buffer configuration */ + bool manual_buffer; + u32 cable_len; + u32 xoff; + u16 port_buff_cell_sz; +}; + +#define MLX5E_MAX_DSCP (64) + +struct mlx5e_dcbx_dp { + u8 dscp2prio[MLX5E_MAX_DSCP]; + u8 trust_state; +}; + +void mlx5e_dcbnl_build_netdev(struct net_device *netdev); +void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv); +void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv); +void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv); +#else +static inline void mlx5e_dcbnl_build_netdev(struct net_device *netdev) {} +static inline void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) {} +static inline void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv) {} +static inline void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv) {} +#endif + +#endif /* __MLX5E_DCBNL_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c new file mode 100644 index 0000000000..c6b6e290fd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#include "en/devlink.h" +#include "eswitch.h" + +static const struct devlink_ops mlx5e_devlink_ops = { +}; + +struct mlx5e_dev *mlx5e_create_devlink(struct device *dev, + struct mlx5_core_dev *mdev) +{ + struct mlx5e_dev *mlx5e_dev; + struct devlink *devlink; + + devlink = devlink_alloc_ns(&mlx5e_devlink_ops, sizeof(*mlx5e_dev), + devlink_net(priv_to_devlink(mdev)), dev); + if (!devlink) + return ERR_PTR(-ENOMEM); + devlink_register(devlink); + return devlink_priv(devlink); +} + +void mlx5e_destroy_devlink(struct mlx5e_dev *mlx5e_dev) +{ + struct devlink *devlink = priv_to_devlink(mlx5e_dev); + + devlink_unregister(devlink); + devlink_free(devlink); +} + +static void +mlx5e_devlink_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_item_id *ppid) +{ + u64 parent_id; + + parent_id = mlx5_query_nic_system_image_guid(dev); + ppid->id_len = sizeof(parent_id); + memcpy(ppid->id, &parent_id, sizeof(parent_id)); +} + +int mlx5e_devlink_port_register(struct mlx5e_dev *mlx5e_dev, + struct mlx5_core_dev *mdev) +{ + struct devlink *devlink = priv_to_devlink(mlx5e_dev); + struct devlink_port_attrs attrs = {}; + struct netdev_phys_item_id ppid = {}; + unsigned int dl_port_index; + + if (mlx5_core_is_pf(mdev)) { + attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + attrs.phys.port_number = mlx5_get_dev_index(mdev); + if (MLX5_ESWITCH_MANAGER(mdev)) { + mlx5e_devlink_get_port_parent_id(mdev, &ppid); + memcpy(attrs.switch_id.id, ppid.id, ppid.id_len); + attrs.switch_id.id_len = ppid.id_len; + } + dl_port_index = mlx5_esw_vport_to_devlink_port_index(mdev, + MLX5_VPORT_UPLINK); + } else { + attrs.flavour = DEVLINK_PORT_FLAVOUR_VIRTUAL; + dl_port_index = mlx5_esw_vport_to_devlink_port_index(mdev, 0); + } + + devlink_port_attrs_set(&mlx5e_dev->dl_port, &attrs); + + return devlink_port_register(devlink, &mlx5e_dev->dl_port, + dl_port_index); +} + +void mlx5e_devlink_port_unregister(struct mlx5e_dev *mlx5e_dev) +{ + devlink_port_unregister(&mlx5e_dev->dl_port); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h new file mode 100644 index 0000000000..d5ec4461f3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5E_EN_DEVLINK_H +#define __MLX5E_EN_DEVLINK_H + +#include +#include "en.h" + +struct mlx5e_dev *mlx5e_create_devlink(struct device *dev, + struct mlx5_core_dev *mdev); +void mlx5e_destroy_devlink(struct mlx5e_dev *mlx5e_dev); +int mlx5e_devlink_port_register(struct mlx5e_dev *mlx5e_dev, + struct mlx5_core_dev *mdev); +void mlx5e_devlink_port_unregister(struct mlx5e_dev *mlx5e_dev); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h new file mode 100644 index 0000000000..e5a44b0b96 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -0,0 +1,209 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#ifndef __MLX5E_FLOW_STEER_H__ +#define __MLX5E_FLOW_STEER_H__ + +#include "mod_hdr.h" +#include "lib/fs_ttc.h" + +struct mlx5e_post_act; +struct mlx5e_tc_table; + +enum { + MLX5E_TC_FT_LEVEL = 0, + MLX5E_TC_TTC_FT_LEVEL, + MLX5E_TC_MISS_LEVEL, +}; + +enum { + MLX5E_TC_PRIO = 0, + MLX5E_NIC_PRIO +}; + +struct mlx5e_flow_table { + int num_groups; + struct mlx5_flow_table *t; + struct mlx5_flow_group **g; +}; + +struct mlx5e_l2_rule { + u8 addr[ETH_ALEN + 2]; + struct mlx5_flow_handle *rule; +}; + +#define MLX5E_L2_ADDR_HASH_SIZE BIT(BITS_PER_BYTE) + +struct mlx5e_promisc_table { + struct mlx5e_flow_table ft; + struct mlx5_flow_handle *rule; +}; + +/* Forward declaration and APIs to get private fields of vlan_table */ +struct mlx5e_vlan_table; +unsigned long *mlx5e_vlan_get_active_svlans(struct mlx5e_vlan_table *vlan); +struct mlx5_flow_table *mlx5e_vlan_get_flowtable(struct mlx5e_vlan_table *vlan); + +struct mlx5e_l2_table { + struct mlx5e_flow_table ft; + struct hlist_head netdev_uc[MLX5E_L2_ADDR_HASH_SIZE]; + struct hlist_head netdev_mc[MLX5E_L2_ADDR_HASH_SIZE]; + struct mlx5e_l2_rule broadcast; + struct mlx5e_l2_rule allmulti; + struct mlx5_flow_handle *trap_rule; + bool broadcast_enabled; + bool allmulti_enabled; + bool promisc_enabled; +}; + +#define MLX5E_NUM_INDIR_TIRS (MLX5_NUM_TT - 1) + +#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP) +#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP |\ + MLX5_HASH_FIELD_SEL_L4_SPORT |\ + MLX5_HASH_FIELD_SEL_L4_DPORT) +#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP |\ + MLX5_HASH_FIELD_SEL_IPSEC_SPI) + +/* NIC prio FTS */ +enum { + MLX5E_PROMISC_FT_LEVEL, + MLX5E_VLAN_FT_LEVEL, + MLX5E_L2_FT_LEVEL, + MLX5E_TTC_FT_LEVEL, + MLX5E_INNER_TTC_FT_LEVEL, + MLX5E_FS_TT_UDP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, + MLX5E_FS_TT_ANY_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, +#ifdef CONFIG_MLX5_EN_TLS + MLX5E_ACCEL_FS_TCP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, +#endif +#ifdef CONFIG_MLX5_EN_ARFS + MLX5E_ARFS_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, +#endif +#ifdef CONFIG_MLX5_EN_IPSEC + MLX5E_ACCEL_FS_POL_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, + MLX5E_ACCEL_FS_ESP_FT_LEVEL, + MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL, + MLX5E_ACCEL_FS_ESP_FT_ROCE_LEVEL, +#endif +}; + +struct mlx5e_flow_steering; +struct mlx5e_rx_res; + +#ifdef CONFIG_MLX5_EN_ARFS +struct mlx5e_arfs_tables; + +int mlx5e_arfs_create_tables(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, bool ntuple); +void mlx5e_arfs_destroy_tables(struct mlx5e_flow_steering *fs, bool ntuple); +int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs); +int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs); +int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, + u16 rxq_index, u32 flow_id); +#else +static inline int mlx5e_arfs_create_tables(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, bool ntuple) +{ return 0; } +static inline void mlx5e_arfs_destroy_tables(struct mlx5e_flow_steering *fs, bool ntuple) {} +static inline int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs) +{ return -EOPNOTSUPP; } +static inline int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs) +{ return -EOPNOTSUPP; } +#endif + +#ifdef CONFIG_MLX5_EN_TLS +struct mlx5e_accel_fs_tcp; +#endif + +struct mlx5e_profile; +struct mlx5e_fs_udp; +struct mlx5e_fs_any; +struct mlx5e_ptp_fs; + +void mlx5e_set_ttc_params(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, + struct ttc_params *ttc_params, bool tunnel); + +void mlx5e_destroy_ttc_table(struct mlx5e_flow_steering *fs); +int mlx5e_create_ttc_table(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res); + +void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft); + +void mlx5e_enable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc); +void mlx5e_disable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc); + +int mlx5e_create_flow_steering(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, + const struct mlx5e_profile *profile, + struct net_device *netdev); +void mlx5e_destroy_flow_steering(struct mlx5e_flow_steering *fs, bool ntuple, + const struct mlx5e_profile *profile); + +struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile, + struct mlx5_core_dev *mdev, + bool state_destroy, + struct dentry *dfs_root); +void mlx5e_fs_cleanup(struct mlx5e_flow_steering *fs); +struct mlx5e_vlan_table *mlx5e_fs_get_vlan(struct mlx5e_flow_steering *fs); +void mlx5e_fs_set_tc(struct mlx5e_flow_steering *fs, struct mlx5e_tc_table *tc); +struct mlx5e_tc_table *mlx5e_fs_get_tc(struct mlx5e_flow_steering *fs); +struct mlx5e_l2_table *mlx5e_fs_get_l2(struct mlx5e_flow_steering *fs); +struct mlx5_flow_namespace *mlx5e_fs_get_ns(struct mlx5e_flow_steering *fs, bool egress); +void mlx5e_fs_set_ns(struct mlx5e_flow_steering *fs, struct mlx5_flow_namespace *ns, bool egress); +#ifdef CONFIG_MLX5_EN_RXNFC +struct mlx5e_ethtool_steering *mlx5e_fs_get_ethtool(struct mlx5e_flow_steering *fs); +#endif +struct mlx5_ttc_table *mlx5e_fs_get_ttc(struct mlx5e_flow_steering *fs, bool inner); +void mlx5e_fs_set_ttc(struct mlx5e_flow_steering *fs, struct mlx5_ttc_table *ttc, bool inner); +#ifdef CONFIG_MLX5_EN_ARFS +struct mlx5e_arfs_tables *mlx5e_fs_get_arfs(struct mlx5e_flow_steering *fs); +void mlx5e_fs_set_arfs(struct mlx5e_flow_steering *fs, struct mlx5e_arfs_tables *arfs); +#endif +struct mlx5e_ptp_fs *mlx5e_fs_get_ptp(struct mlx5e_flow_steering *fs); +void mlx5e_fs_set_ptp(struct mlx5e_flow_steering *fs, struct mlx5e_ptp_fs *ptp_fs); +struct mlx5e_fs_any *mlx5e_fs_get_any(struct mlx5e_flow_steering *fs); +void mlx5e_fs_set_any(struct mlx5e_flow_steering *fs, struct mlx5e_fs_any *any); +struct mlx5e_fs_udp *mlx5e_fs_get_udp(struct mlx5e_flow_steering *fs); +void mlx5e_fs_set_udp(struct mlx5e_flow_steering *fs, struct mlx5e_fs_udp *udp); +#ifdef CONFIG_MLX5_EN_TLS +struct mlx5e_accel_fs_tcp *mlx5e_fs_get_accel_tcp(struct mlx5e_flow_steering *fs); +void mlx5e_fs_set_accel_tcp(struct mlx5e_flow_steering *fs, struct mlx5e_accel_fs_tcp *accel_tcp); +#endif +void mlx5e_fs_set_state_destroy(struct mlx5e_flow_steering *fs, bool state_destroy); +void mlx5e_fs_set_vlan_strip_disable(struct mlx5e_flow_steering *fs, bool vlan_strip_disable); + +struct mlx5_core_dev *mlx5e_fs_get_mdev(struct mlx5e_flow_steering *fs); +int mlx5e_add_vlan_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num); +void mlx5e_remove_vlan_trap(struct mlx5e_flow_steering *fs); +int mlx5e_add_mac_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num); +void mlx5e_remove_mac_trap(struct mlx5e_flow_steering *fs); +void mlx5e_fs_set_rx_mode_work(struct mlx5e_flow_steering *fs, struct net_device *netdev); +int mlx5e_fs_vlan_rx_add_vid(struct mlx5e_flow_steering *fs, + struct net_device *netdev, + __be16 proto, u16 vid); +int mlx5e_fs_vlan_rx_kill_vid(struct mlx5e_flow_steering *fs, + struct net_device *netdev, + __be16 proto, u16 vid); +void mlx5e_fs_init_l2_addr(struct mlx5e_flow_steering *fs, struct net_device *netdev); + +struct dentry *mlx5e_fs_get_debugfs_root(struct mlx5e_flow_steering *fs); + +#define fs_err(fs, fmt, ...) \ + mlx5_core_err(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__) + +#define fs_dbg(fs, fmt, ...) \ + mlx5_core_dbg(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__) + +#define fs_warn(fs, fmt, ...) \ + mlx5_core_warn(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__) + +#define fs_warn_once(fs, fmt, ...) \ + mlx5_core_warn_once(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__) + +#endif /* __MLX5E_FLOW_STEER_H__ */ + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h new file mode 100644 index 0000000000..9e276fd3c0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */ + +#ifndef __MLX5E_FS_ETHTOOL_H__ +#define __MLX5E_FS_ETHTOOL_H__ + +struct mlx5e_priv; +struct mlx5e_ethtool_steering; +#ifdef CONFIG_MLX5_EN_RXNFC +int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool); +void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool); +void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs); +void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs); +int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd); +int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv, + struct ethtool_rxnfc *info, u32 *rule_locs); +#else +static inline int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool) +{ return 0; } +static inline void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool) { } +static inline void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs) { } +static inline void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs) { } +static inline int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd) +{ return -EOPNOTSUPP; } +static inline int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv, + struct ethtool_rxnfc *info, u32 *rule_locs) +{ return -EOPNOTSUPP; } +#endif +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c new file mode 100644 index 0000000000..671adbad0a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c @@ -0,0 +1,615 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#include "en/fs_tt_redirect.h" +#include "fs_core.h" +#include "mlx5_core.h" + +enum fs_udp_type { + FS_IPV4_UDP, + FS_IPV6_UDP, + FS_UDP_NUM_TYPES, +}; + +struct mlx5e_fs_udp { + struct mlx5e_flow_table tables[FS_UDP_NUM_TYPES]; + struct mlx5_flow_handle *default_rules[FS_UDP_NUM_TYPES]; + int ref_cnt; +}; + +struct mlx5e_fs_any { + struct mlx5e_flow_table table; + struct mlx5_flow_handle *default_rule; + int ref_cnt; +}; + +static char *fs_udp_type2str(enum fs_udp_type i) +{ + switch (i) { + case FS_IPV4_UDP: + return "UDP v4"; + default: /* FS_IPV6_UDP */ + return "UDP v6"; + } +} + +static enum mlx5_traffic_types fs_udp2tt(enum fs_udp_type i) +{ + switch (i) { + case FS_IPV4_UDP: + return MLX5_TT_IPV4_UDP; + default: /* FS_IPV6_UDP */ + return MLX5_TT_IPV6_UDP; + } +} + +static enum fs_udp_type tt2fs_udp(enum mlx5_traffic_types i) +{ + switch (i) { + case MLX5_TT_IPV4_UDP: + return FS_IPV4_UDP; + case MLX5_TT_IPV6_UDP: + return FS_IPV6_UDP; + default: + return FS_UDP_NUM_TYPES; + } +} + +void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule) +{ + mlx5_del_flow_rules(rule); +} + +static void fs_udp_set_dport_flow(struct mlx5_flow_spec *spec, enum fs_udp_type type, + u16 udp_dport) +{ + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_UDP); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, + type == FS_IPV4_UDP ? 4 : 6); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.udp_dport); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport, udp_dport); +} + +struct mlx5_flow_handle * +mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_flow_steering *fs, + enum mlx5_traffic_types ttc_type, + u32 tir_num, u16 d_port) +{ + struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs); + enum fs_udp_type type = tt2fs_udp(ttc_type); + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_table *ft = NULL; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err; + + if (type == FS_UDP_NUM_TYPES) + return ERR_PTR(-EINVAL); + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + ft = fs_udp->tables[type].t; + + fs_udp_set_dport_flow(spec, type, d_port); + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = tir_num; + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + kvfree(spec); + + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + fs_err(fs, "%s: add %s rule failed, err %d\n", + __func__, fs_udp_type2str(type), err); + } + return rule; +} + +static int fs_udp_add_default_rule(struct mlx5e_flow_steering *fs, enum fs_udp_type type) +{ + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs); + struct mlx5e_flow_table *fs_udp_t; + struct mlx5_flow_destination dest; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + int err; + + fs_udp_t = &fs_udp->tables[type]; + + dest = mlx5_ttc_get_default_dest(ttc, fs_udp2tt(type)); + rule = mlx5_add_flow_rules(fs_udp_t->t, NULL, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + fs_err(fs, "%s: add default rule failed, fs type=%d, err %d\n", + __func__, type, err); + return err; + } + + fs_udp->default_rules[type] = rule; + return 0; +} + +#define MLX5E_FS_UDP_NUM_GROUPS (2) +#define MLX5E_FS_UDP_GROUP1_SIZE (BIT(16)) +#define MLX5E_FS_UDP_GROUP2_SIZE (BIT(0)) +#define MLX5E_FS_UDP_TABLE_SIZE (MLX5E_FS_UDP_GROUP1_SIZE +\ + MLX5E_FS_UDP_GROUP2_SIZE) +static int fs_udp_create_groups(struct mlx5e_flow_table *ft, enum fs_udp_type type) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *outer_headers_c; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_FS_UDP_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in || !ft->g) { + kfree(ft->g); + ft->g = NULL; + kvfree(in); + return -ENOMEM; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_version); + + switch (type) { + case FS_IPV4_UDP: + case FS_IPV6_UDP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport); + break; + default: + err = -EINVAL; + goto out; + } + /* Match on udp protocol, Ipv4/6 and dport */ + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_FS_UDP_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Default Flow Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_FS_UDP_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; +out: + kvfree(in); + + return err; +} + +static int fs_udp_create_table(struct mlx5e_flow_steering *fs, enum fs_udp_type type) +{ + struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false); + struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5e_flow_table *ft; + int err; + + ft = &fs_udp->tables[type]; + ft->num_groups = 0; + + ft_attr.max_fte = MLX5E_FS_UDP_TABLE_SIZE; + ft_attr.level = MLX5E_FS_TT_UDP_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + mlx5_core_dbg(mlx5e_fs_get_mdev(fs), "Created fs %s table id %u level %u\n", + fs_udp_type2str(type), ft->t->id, ft->t->level); + + err = fs_udp_create_groups(ft, type); + if (err) + goto err; + + err = fs_udp_add_default_rule(fs, type); + if (err) + goto err; + + return 0; + +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +static void fs_udp_destroy_table(struct mlx5e_fs_udp *fs_udp, int i) +{ + if (IS_ERR_OR_NULL(fs_udp->tables[i].t)) + return; + + mlx5_del_flow_rules(fs_udp->default_rules[i]); + mlx5e_destroy_flow_table(&fs_udp->tables[i]); + fs_udp->tables[i].t = NULL; +} + +static int fs_udp_disable(struct mlx5e_flow_steering *fs) +{ + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + int err, i; + + for (i = 0; i < FS_UDP_NUM_TYPES; i++) { + /* Modify ttc rules destination to point back to the indir TIRs */ + err = mlx5_ttc_fwd_default_dest(ttc, fs_udp2tt(i)); + if (err) { + fs_err(fs, "%s: modify ttc[%d] default destination failed, err(%d)\n", + __func__, fs_udp2tt(i), err); + return err; + } + } + + return 0; +} + +static int fs_udp_enable(struct mlx5e_flow_steering *fs) +{ + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + struct mlx5e_fs_udp *udp = mlx5e_fs_get_udp(fs); + struct mlx5_flow_destination dest = {}; + int err, i; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + for (i = 0; i < FS_UDP_NUM_TYPES; i++) { + dest.ft = udp->tables[i].t; + + /* Modify ttc rules destination to point on the accel_fs FTs */ + err = mlx5_ttc_fwd_dest(ttc, fs_udp2tt(i), &dest); + if (err) { + fs_err(fs, "%s: modify ttc[%d] destination to accel failed, err(%d)\n", + __func__, fs_udp2tt(i), err); + return err; + } + } + return 0; +} + +void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs); + int i; + + if (!fs_udp) + return; + + if (--fs_udp->ref_cnt) + return; + + fs_udp_disable(fs); + + for (i = 0; i < FS_UDP_NUM_TYPES; i++) + fs_udp_destroy_table(fs_udp, i); + + kfree(fs_udp); + mlx5e_fs_set_udp(fs, NULL); +} + +int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_fs_udp *udp = mlx5e_fs_get_udp(fs); + int i, err; + + if (udp) { + udp->ref_cnt++; + return 0; + } + + udp = kzalloc(sizeof(*udp), GFP_KERNEL); + if (!udp) + return -ENOMEM; + mlx5e_fs_set_udp(fs, udp); + + for (i = 0; i < FS_UDP_NUM_TYPES; i++) { + err = fs_udp_create_table(fs, i); + if (err) + goto err_destroy_tables; + } + + err = fs_udp_enable(fs); + if (err) + goto err_destroy_tables; + + udp->ref_cnt = 1; + + return 0; + +err_destroy_tables: + while (--i >= 0) + fs_udp_destroy_table(udp, i); + + kfree(udp); + mlx5e_fs_set_udp(fs, NULL); + return err; +} + +static void fs_any_set_ethertype_flow(struct mlx5_flow_spec *spec, u16 ether_type) +{ + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ether_type); +} + +struct mlx5_flow_handle * +mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_flow_steering *fs, + u32 tir_num, u16 ether_type) +{ + struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs); + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_table *ft = NULL; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + ft = fs_any->table.t; + + fs_any_set_ethertype_flow(spec, ether_type); + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = tir_num; + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + kvfree(spec); + + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + fs_err(fs, "%s: add ANY rule failed, err %d\n", + __func__, err); + } + return rule; +} + +static int fs_any_add_default_rule(struct mlx5e_flow_steering *fs) +{ + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs); + struct mlx5e_flow_table *fs_any_t; + struct mlx5_flow_destination dest; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + int err; + + fs_any_t = &fs_any->table; + dest = mlx5_ttc_get_default_dest(ttc, MLX5_TT_ANY); + rule = mlx5_add_flow_rules(fs_any_t->t, NULL, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + fs_err(fs, "%s: add default rule failed, fs type=ANY, err %d\n", + __func__, err); + return err; + } + + fs_any->default_rule = rule; + return 0; +} + +#define MLX5E_FS_ANY_NUM_GROUPS (2) +#define MLX5E_FS_ANY_GROUP1_SIZE (BIT(16)) +#define MLX5E_FS_ANY_GROUP2_SIZE (BIT(0)) +#define MLX5E_FS_ANY_TABLE_SIZE (MLX5E_FS_ANY_GROUP1_SIZE +\ + MLX5E_FS_ANY_GROUP2_SIZE) + +static int fs_any_create_groups(struct mlx5e_flow_table *ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *outer_headers_c; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_FS_UDP_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in || !ft->g) { + kfree(ft->g); + ft->g = NULL; + kvfree(in); + return -ENOMEM; + } + + /* Match on ethertype */ + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ethertype); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_FS_ANY_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Default Flow Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_FS_ANY_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + kvfree(in); + + return err; +} + +static int fs_any_create_table(struct mlx5e_flow_steering *fs) +{ + struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false); + struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs); + struct mlx5e_flow_table *ft = &fs_any->table; + struct mlx5_flow_table_attr ft_attr = {}; + int err; + + ft->num_groups = 0; + + ft_attr.max_fte = MLX5E_FS_UDP_TABLE_SIZE; + ft_attr.level = MLX5E_FS_TT_ANY_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + mlx5_core_dbg(mlx5e_fs_get_mdev(fs), "Created fs ANY table id %u level %u\n", + ft->t->id, ft->t->level); + + err = fs_any_create_groups(ft); + if (err) + goto err; + + err = fs_any_add_default_rule(fs); + if (err) + goto err; + + return 0; + +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +static int fs_any_disable(struct mlx5e_flow_steering *fs) +{ + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + int err; + + /* Modify ttc rules destination to point back to the indir TIRs */ + err = mlx5_ttc_fwd_default_dest(ttc, MLX5_TT_ANY); + if (err) { + fs_err(fs, + "%s: modify ttc[%d] default destination failed, err(%d)\n", + __func__, MLX5_TT_ANY, err); + return err; + } + return 0; +} + +static int fs_any_enable(struct mlx5e_flow_steering *fs) +{ + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + struct mlx5e_fs_any *any = mlx5e_fs_get_any(fs); + struct mlx5_flow_destination dest = {}; + int err; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = any->table.t; + + /* Modify ttc rules destination to point on the accel_fs FTs */ + err = mlx5_ttc_fwd_dest(ttc, MLX5_TT_ANY, &dest); + if (err) { + fs_err(fs, + "%s: modify ttc[%d] destination to accel failed, err(%d)\n", + __func__, MLX5_TT_ANY, err); + return err; + } + return 0; +} + +static void fs_any_destroy_table(struct mlx5e_fs_any *fs_any) +{ + if (IS_ERR_OR_NULL(fs_any->table.t)) + return; + + mlx5_del_flow_rules(fs_any->default_rule); + mlx5e_destroy_flow_table(&fs_any->table); + fs_any->table.t = NULL; +} + +void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs); + + if (!fs_any) + return; + + if (--fs_any->ref_cnt) + return; + + fs_any_disable(fs); + + fs_any_destroy_table(fs_any); + + kfree(fs_any); + mlx5e_fs_set_any(fs, NULL); +} + +int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs); + int err; + + if (fs_any) { + fs_any->ref_cnt++; + return 0; + } + + fs_any = kzalloc(sizeof(*fs_any), GFP_KERNEL); + if (!fs_any) + return -ENOMEM; + mlx5e_fs_set_any(fs, fs_any); + + err = fs_any_create_table(fs); + if (err) + goto err_free_any; + + err = fs_any_enable(fs); + if (err) + goto err_destroy_table; + + fs_any->ref_cnt = 1; + + return 0; + +err_destroy_table: + fs_any_destroy_table(fs_any); +err_free_any: + mlx5e_fs_set_any(fs, NULL); + kfree(fs_any); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h new file mode 100644 index 0000000000..5780fd7ad5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5E_FS_TT_REDIRECT_H__ +#define __MLX5E_FS_TT_REDIRECT_H__ + +#include "en/fs.h" + +void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule); + +/* UDP traffic type redirect */ +struct mlx5_flow_handle * +mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_flow_steering *fs, + enum mlx5_traffic_types ttc_type, + u32 tir_num, u16 d_port); +void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_flow_steering *fs); +int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_flow_steering *fs); + +/* ANY traffic type redirect*/ +struct mlx5_flow_handle * +mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_flow_steering *fs, + u32 tir_num, u16 ether_type); +void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_flow_steering *fs); +int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs); +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c new file mode 100644 index 0000000000..6f4e6c34b2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c @@ -0,0 +1,339 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Mellanox Technologies. + +#include "health.h" +#include "lib/eq.h" +#include "lib/mlx5.h" + +int mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name) +{ + int err; + + err = devlink_fmsg_pair_nest_start(fmsg, name); + if (err) + return err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + return 0; +} + +int mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg) +{ + int err; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +int mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) +{ + u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {}; + u8 hw_status; + void *cqc; + int err; + + err = mlx5_core_query_cq(cq->mdev, &cq->mcq, out); + if (err) + return err; + + cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context); + hw_status = MLX5_GET(cqc, cqc, status); + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ"); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "cqn", cq->mcq.cqn); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "HW status", hw_status); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "ci", mlx5_cqwq_get_ci(&cq->wq)); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&cq->wq)); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +int mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) +{ + u8 cq_log_stride; + u32 cq_sz; + int err; + + cq_sz = mlx5_cqwq_get_size(&cq->wq); + cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq); + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ"); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "stride size", BIT(cq_log_stride)); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "size", cq_sz); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg) +{ + int err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "EQ"); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "eqn", eq->core.eqn); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "irqn", eq->core.irqn); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "vecidx", eq->core.vecidx); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "ci", eq->core.cons_index); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "size", eq_get_size(&eq->core)); + if (err) + return err; + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +void mlx5e_health_create_reporters(struct mlx5e_priv *priv) +{ + mlx5e_reporter_tx_create(priv); + mlx5e_reporter_rx_create(priv); +} + +void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv) +{ + mlx5e_reporter_rx_destroy(priv); + mlx5e_reporter_tx_destroy(priv); +} + +void mlx5e_health_channels_update(struct mlx5e_priv *priv) +{ + if (priv->tx_reporter) + devlink_health_reporter_state_update(priv->tx_reporter, + DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); + if (priv->rx_reporter) + devlink_health_reporter_state_update(priv->rx_reporter, + DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); +} + +int mlx5e_health_sq_to_ready(struct mlx5_core_dev *mdev, struct net_device *dev, u32 sqn) +{ + struct mlx5e_modify_sq_param msp = {}; + int err; + + msp.curr_state = MLX5_SQC_STATE_ERR; + msp.next_state = MLX5_SQC_STATE_RST; + + err = mlx5e_modify_sq(mdev, sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to reset\n", sqn); + return err; + } + + memset(&msp, 0, sizeof(msp)); + msp.curr_state = MLX5_SQC_STATE_RST; + msp.next_state = MLX5_SQC_STATE_RDY; + + err = mlx5e_modify_sq(mdev, sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to ready\n", sqn); + return err; + } + + return 0; +} + +int mlx5e_health_recover_channels(struct mlx5e_priv *priv) +{ + int err = 0; + + rtnl_lock(); + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto out; + + err = mlx5e_safe_reopen_channels(priv); + +out: + mutex_unlock(&priv->state_lock); + rtnl_unlock(); + + return err; +} + +int mlx5e_health_channel_eq_recover(struct net_device *dev, struct mlx5_eq_comp *eq, + struct mlx5e_ch_stats *stats) +{ + u32 eqe_count; + + netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", + eq->core.eqn, eq->core.cons_index, eq->core.irqn); + + eqe_count = mlx5_eq_poll_irq_disabled(eq); + if (!eqe_count) + return -EIO; + + netdev_err(dev, "Recovered %d eqes on EQ 0x%x\n", + eqe_count, eq->core.eqn); + + stats->eq_rearm++; + return 0; +} + +int mlx5e_health_report(struct mlx5e_priv *priv, + struct devlink_health_reporter *reporter, char *err_str, + struct mlx5e_err_ctx *err_ctx) +{ + netdev_err(priv->netdev, "%s\n", err_str); + + if (!reporter) + return err_ctx->recover(err_ctx->ctx); + + return devlink_health_report(reporter, err_str, err_ctx); +} + +#define MLX5_HEALTH_DEVLINK_MAX_SIZE 1024 +static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg, + const void *value, u32 value_len) + +{ + u32 data_size; + int err = 0; + u32 offset; + + for (offset = 0; offset < value_len; offset += data_size) { + data_size = value_len - offset; + if (data_size > MLX5_HEALTH_DEVLINK_MAX_SIZE) + data_size = MLX5_HEALTH_DEVLINK_MAX_SIZE; + err = devlink_fmsg_binary_put(fmsg, value + offset, data_size); + if (err) + break; + } + return err; +} + +int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key, + struct devlink_fmsg *fmsg) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_rsc_dump_cmd *cmd; + struct page *page; + int cmd_err, err; + int end_err; + int size; + + if (IS_ERR_OR_NULL(mdev->rsc_dump)) + return -EOPNOTSUPP; + + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + err = devlink_fmsg_binary_pair_nest_start(fmsg, "data"); + if (err) + goto free_page; + + cmd = mlx5_rsc_dump_cmd_create(mdev, key); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto free_page; + } + + do { + cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size); + if (cmd_err < 0) { + err = cmd_err; + goto destroy_cmd; + } + + err = mlx5e_health_rsc_fmsg_binary(fmsg, page_address(page), size); + if (err) + goto destroy_cmd; + + } while (cmd_err > 0); + +destroy_cmd: + mlx5_rsc_dump_cmd_destroy(cmd); + end_err = devlink_fmsg_binary_pair_nest_end(fmsg); + if (end_err) + err = end_err; +free_page: + __free_page(page); + return err; +} + +int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + int queue_idx, char *lbl) +{ + struct mlx5_rsc_key key = {}; + int err; + + key.rsc = MLX5_SGMT_TYPE_FULL_QPC; + key.index1 = queue_idx; + key.size = PAGE_SIZE; + key.num_of_obj1 = 1; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, lbl); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "index", queue_idx); + if (err) + return err; + + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + return devlink_fmsg_obj_nest_end(fmsg); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h new file mode 100644 index 0000000000..415840c3ef --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5E_EN_HEALTH_H +#define __MLX5E_EN_HEALTH_H + +#include "en.h" +#include "diag/rsc_dump.h" + +static inline bool cqe_syndrome_needs_recover(u8 syndrome) +{ + return syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR || + syndrome == MLX5_CQE_SYNDROME_LOCAL_PROT_ERR || + syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR; +} + +void mlx5e_reporter_tx_create(struct mlx5e_priv *priv); +void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv); +void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq); +int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq); +void mlx5e_reporter_tx_ptpsq_unhealthy(struct mlx5e_ptpsq *ptpsq); + +int mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg); +int mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg); +int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg); +int mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name); +int mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg); + +void mlx5e_reporter_rx_create(struct mlx5e_priv *priv); +void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv); +void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq); +void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq); +void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq); +void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c); +void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c); + +#define MLX5E_REPORTER_PER_Q_MAX_LEN 256 + +struct mlx5e_err_ctx { + int (*recover)(void *ctx); + int (*dump)(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, void *ctx); + void *ctx; +}; + +int mlx5e_health_sq_to_ready(struct mlx5_core_dev *mdev, struct net_device *dev, u32 sqn); +int mlx5e_health_channel_eq_recover(struct net_device *dev, struct mlx5_eq_comp *eq, + struct mlx5e_ch_stats *stats); +int mlx5e_health_recover_channels(struct mlx5e_priv *priv); +int mlx5e_health_report(struct mlx5e_priv *priv, + struct devlink_health_reporter *reporter, char *err_str, + struct mlx5e_err_ctx *err_ctx); +void mlx5e_health_create_reporters(struct mlx5e_priv *priv); +void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv); +void mlx5e_health_channels_update(struct mlx5e_priv *priv); +int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key, + struct devlink_fmsg *fmsg); +int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + int queue_idx, char *lbl); +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c new file mode 100644 index 0000000000..09d441ecb9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c @@ -0,0 +1,722 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include +#include "htb.h" +#include "en.h" +#include "../qos.h" + +struct mlx5e_qos_node { + struct hlist_node hnode; + struct mlx5e_qos_node *parent; + u64 rate; + u32 bw_share; + u32 max_average_bw; + u32 hw_id; + u32 classid; /* 16-bit, except root. */ + u16 qid; +}; + +struct mlx5e_htb { + DECLARE_HASHTABLE(qos_tc2node, order_base_2(MLX5E_QOS_MAX_LEAF_NODES)); + DECLARE_BITMAP(qos_used_qids, MLX5E_QOS_MAX_LEAF_NODES); + struct mlx5_core_dev *mdev; + struct net_device *netdev; + struct mlx5e_priv *priv; + struct mlx5e_selq *selq; +}; + +#define MLX5E_QOS_QID_INNER 0xffff +#define MLX5E_HTB_CLASSID_ROOT 0xffffffff + +/* Software representation of the QoS tree */ + +int mlx5e_htb_enumerate_leaves(struct mlx5e_htb *htb, mlx5e_fp_htb_enumerate callback, void *data) +{ + struct mlx5e_qos_node *node = NULL; + int bkt, err; + + hash_for_each(htb->qos_tc2node, bkt, node, hnode) { + if (node->qid == MLX5E_QOS_QID_INNER) + continue; + err = callback(data, node->qid, node->hw_id); + if (err) + return err; + } + return 0; +} + +int mlx5e_htb_cur_leaf_nodes(struct mlx5e_htb *htb) +{ + int last; + + last = find_last_bit(htb->qos_used_qids, mlx5e_qos_max_leaf_nodes(htb->mdev)); + return last == mlx5e_qos_max_leaf_nodes(htb->mdev) ? 0 : last + 1; +} + +static int mlx5e_htb_find_unused_qos_qid(struct mlx5e_htb *htb) +{ + int size = mlx5e_qos_max_leaf_nodes(htb->mdev); + struct mlx5e_priv *priv = htb->priv; + int res; + + WARN_ONCE(!mutex_is_locked(&priv->state_lock), "%s: state_lock is not held\n", __func__); + res = find_first_zero_bit(htb->qos_used_qids, size); + + return res == size ? -ENOSPC : res; +} + +static struct mlx5e_qos_node * +mlx5e_htb_node_create_leaf(struct mlx5e_htb *htb, u16 classid, u16 qid, + struct mlx5e_qos_node *parent) +{ + struct mlx5e_qos_node *node; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return ERR_PTR(-ENOMEM); + + node->parent = parent; + + node->qid = qid; + __set_bit(qid, htb->qos_used_qids); + + node->classid = classid; + hash_add_rcu(htb->qos_tc2node, &node->hnode, classid); + + mlx5e_update_tx_netdev_queues(htb->priv); + + return node; +} + +static struct mlx5e_qos_node *mlx5e_htb_node_create_root(struct mlx5e_htb *htb) +{ + struct mlx5e_qos_node *node; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return ERR_PTR(-ENOMEM); + + node->qid = MLX5E_QOS_QID_INNER; + node->classid = MLX5E_HTB_CLASSID_ROOT; + hash_add_rcu(htb->qos_tc2node, &node->hnode, node->classid); + + return node; +} + +static struct mlx5e_qos_node *mlx5e_htb_node_find(struct mlx5e_htb *htb, u32 classid) +{ + struct mlx5e_qos_node *node = NULL; + + hash_for_each_possible(htb->qos_tc2node, node, hnode, classid) { + if (node->classid == classid) + break; + } + + return node; +} + +static struct mlx5e_qos_node *mlx5e_htb_node_find_rcu(struct mlx5e_htb *htb, u32 classid) +{ + struct mlx5e_qos_node *node = NULL; + + hash_for_each_possible_rcu(htb->qos_tc2node, node, hnode, classid) { + if (node->classid == classid) + break; + } + + return node; +} + +static void mlx5e_htb_node_delete(struct mlx5e_htb *htb, struct mlx5e_qos_node *node) +{ + hash_del_rcu(&node->hnode); + if (node->qid != MLX5E_QOS_QID_INNER) { + __clear_bit(node->qid, htb->qos_used_qids); + mlx5e_update_tx_netdev_queues(htb->priv); + } + /* Make sure this qid is no longer selected by mlx5e_select_queue, so + * that mlx5e_reactivate_qos_sq can safely restart the netdev TX queue. + */ + synchronize_net(); + kfree(node); +} + +/* TX datapath API */ + +int mlx5e_htb_get_txq_by_classid(struct mlx5e_htb *htb, u16 classid) +{ + struct mlx5e_qos_node *node; + u16 qid; + int res; + + rcu_read_lock(); + + node = mlx5e_htb_node_find_rcu(htb, classid); + if (!node) { + res = -ENOENT; + goto out; + } + qid = READ_ONCE(node->qid); + if (qid == MLX5E_QOS_QID_INNER) { + res = -EINVAL; + goto out; + } + res = mlx5e_qid_from_qos(&htb->priv->channels, qid); + +out: + rcu_read_unlock(); + return res; +} + +/* HTB TC handlers */ + +static int +mlx5e_htb_root_add(struct mlx5e_htb *htb, u16 htb_maj_id, u16 htb_defcls, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = htb->priv; + struct mlx5e_qos_node *root; + bool opened; + int err; + + qos_dbg(htb->mdev, "TC_HTB_CREATE handle %04x:, default :%04x\n", htb_maj_id, htb_defcls); + + mlx5e_selq_prepare_htb(htb->selq, htb_maj_id, htb_defcls); + + opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (opened) { + err = mlx5e_qos_alloc_queues(priv, &priv->channels); + if (err) + goto err_cancel_selq; + } + + root = mlx5e_htb_node_create_root(htb); + if (IS_ERR(root)) { + err = PTR_ERR(root); + goto err_free_queues; + } + + err = mlx5_qos_create_root_node(htb->mdev, &root->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error. Try upgrading firmware."); + goto err_sw_node_delete; + } + + mlx5e_selq_apply(htb->selq); + + return 0; + +err_sw_node_delete: + mlx5e_htb_node_delete(htb, root); + +err_free_queues: + if (opened) + mlx5e_qos_close_all_queues(&priv->channels); +err_cancel_selq: + mlx5e_selq_cancel(htb->selq); + return err; +} + +static int mlx5e_htb_root_del(struct mlx5e_htb *htb) +{ + struct mlx5e_priv *priv = htb->priv; + struct mlx5e_qos_node *root; + int err; + + qos_dbg(htb->mdev, "TC_HTB_DESTROY\n"); + + /* Wait until real_num_tx_queues is updated for mlx5e_select_queue, + * so that we can safely switch to its non-HTB non-PTP fastpath. + */ + synchronize_net(); + + mlx5e_selq_prepare_htb(htb->selq, 0, 0); + mlx5e_selq_apply(htb->selq); + + root = mlx5e_htb_node_find(htb, MLX5E_HTB_CLASSID_ROOT); + if (!root) { + qos_err(htb->mdev, "Failed to find the root node in the QoS tree\n"); + return -ENOENT; + } + err = mlx5_qos_destroy_node(htb->mdev, root->hw_id); + if (err) + qos_err(htb->mdev, "Failed to destroy root node %u, err = %d\n", + root->hw_id, err); + mlx5e_htb_node_delete(htb, root); + + mlx5e_qos_deactivate_all_queues(&priv->channels); + mlx5e_qos_close_all_queues(&priv->channels); + + return err; +} + +static int mlx5e_htb_convert_rate(struct mlx5e_htb *htb, u64 rate, + struct mlx5e_qos_node *parent, u32 *bw_share) +{ + u64 share = 0; + + while (parent->classid != MLX5E_HTB_CLASSID_ROOT && !parent->max_average_bw) + parent = parent->parent; + + if (parent->max_average_bw) + share = div64_u64(div_u64(rate * 100, BYTES_IN_MBIT), + parent->max_average_bw); + else + share = 101; + + *bw_share = share == 0 ? 1 : share > 100 ? 0 : share; + + qos_dbg(htb->mdev, "Convert: rate %llu, parent ceil %llu -> bw_share %u\n", + rate, (u64)parent->max_average_bw * BYTES_IN_MBIT, *bw_share); + + return 0; +} + +static void mlx5e_htb_convert_ceil(struct mlx5e_htb *htb, u64 ceil, u32 *max_average_bw) +{ + /* Hardware treats 0 as "unlimited", set at least 1. */ + *max_average_bw = max_t(u32, div_u64(ceil, BYTES_IN_MBIT), 1); + + qos_dbg(htb->mdev, "Convert: ceil %llu -> max_average_bw %u\n", + ceil, *max_average_bw); +} + +int +mlx5e_htb_leaf_alloc_queue(struct mlx5e_htb *htb, u16 classid, + u32 parent_classid, u64 rate, u64 ceil, + struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *node, *parent; + struct mlx5e_priv *priv = htb->priv; + int qid; + int err; + + qos_dbg(htb->mdev, "TC_HTB_LEAF_ALLOC_QUEUE classid %04x, parent %04x, rate %llu, ceil %llu\n", + classid, parent_classid, rate, ceil); + + qid = mlx5e_htb_find_unused_qos_qid(htb); + if (qid < 0) { + NL_SET_ERR_MSG_MOD(extack, "Maximum amount of leaf classes is reached."); + return qid; + } + + parent = mlx5e_htb_node_find(htb, parent_classid); + if (!parent) + return -EINVAL; + + node = mlx5e_htb_node_create_leaf(htb, classid, qid, parent); + if (IS_ERR(node)) + return PTR_ERR(node); + + node->rate = rate; + mlx5e_htb_convert_rate(htb, rate, node->parent, &node->bw_share); + mlx5e_htb_convert_ceil(htb, ceil, &node->max_average_bw); + + err = mlx5_qos_create_leaf_node(htb->mdev, node->parent->hw_id, + node->bw_share, node->max_average_bw, + &node->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node."); + qos_err(htb->mdev, "Failed to create a leaf node (class %04x), err = %d\n", + classid, err); + mlx5e_htb_node_delete(htb, node); + return err; + } + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_open_qos_sq(priv, &priv->channels, node->qid, node->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); + qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n", + classid, err); + } else { + mlx5e_activate_qos_sq(priv, node->qid, node->hw_id); + } + } + + return mlx5e_qid_from_qos(&priv->channels, node->qid); +} + +int +mlx5e_htb_leaf_to_inner(struct mlx5e_htb *htb, u16 classid, u16 child_classid, + u64 rate, u64 ceil, struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *node, *child; + struct mlx5e_priv *priv = htb->priv; + int err, tmp_err; + u32 new_hw_id; + u16 qid; + + qos_dbg(htb->mdev, "TC_HTB_LEAF_TO_INNER classid %04x, upcoming child %04x, rate %llu, ceil %llu\n", + classid, child_classid, rate, ceil); + + node = mlx5e_htb_node_find(htb, classid); + if (!node) + return -ENOENT; + + err = mlx5_qos_create_inner_node(htb->mdev, node->parent->hw_id, + node->bw_share, node->max_average_bw, + &new_hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating an inner node."); + qos_err(htb->mdev, "Failed to create an inner node (class %04x), err = %d\n", + classid, err); + return err; + } + + /* Intentionally reuse the qid for the upcoming first child. */ + child = mlx5e_htb_node_create_leaf(htb, child_classid, node->qid, node); + if (IS_ERR(child)) { + err = PTR_ERR(child); + goto err_destroy_hw_node; + } + + child->rate = rate; + mlx5e_htb_convert_rate(htb, rate, node, &child->bw_share); + mlx5e_htb_convert_ceil(htb, ceil, &child->max_average_bw); + + err = mlx5_qos_create_leaf_node(htb->mdev, new_hw_id, child->bw_share, + child->max_average_bw, &child->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node."); + qos_err(htb->mdev, "Failed to create a leaf node (class %04x), err = %d\n", + classid, err); + goto err_delete_sw_node; + } + + /* No fail point. */ + + qid = node->qid; + /* Pairs with mlx5e_htb_get_txq_by_classid. */ + WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER); + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + mlx5e_deactivate_qos_sq(priv, qid); + mlx5e_close_qos_sq(priv, qid); + } + + err = mlx5_qos_destroy_node(htb->mdev, node->hw_id); + if (err) /* Not fatal. */ + qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", + node->hw_id, classid, err); + + node->hw_id = new_hw_id; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_open_qos_sq(priv, &priv->channels, child->qid, child->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); + qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n", + classid, err); + } else { + mlx5e_activate_qos_sq(priv, child->qid, child->hw_id); + } + } + + return 0; + +err_delete_sw_node: + child->qid = MLX5E_QOS_QID_INNER; + mlx5e_htb_node_delete(htb, child); + +err_destroy_hw_node: + tmp_err = mlx5_qos_destroy_node(htb->mdev, new_hw_id); + if (tmp_err) /* Not fatal. */ + qos_warn(htb->mdev, "Failed to roll back creation of an inner node %u (class %04x), err = %d\n", + new_hw_id, classid, tmp_err); + return err; +} + +static struct mlx5e_qos_node *mlx5e_htb_node_find_by_qid(struct mlx5e_htb *htb, u16 qid) +{ + struct mlx5e_qos_node *node = NULL; + int bkt; + + hash_for_each(htb->qos_tc2node, bkt, node, hnode) + if (node->qid == qid) + break; + + return node; +} + +int mlx5e_htb_leaf_del(struct mlx5e_htb *htb, u16 *classid, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = htb->priv; + struct mlx5e_qos_node *node; + struct netdev_queue *txq; + u16 qid, moved_qid; + bool opened; + int err; + + qos_dbg(htb->mdev, "TC_HTB_LEAF_DEL classid %04x\n", *classid); + + node = mlx5e_htb_node_find(htb, *classid); + if (!node) + return -ENOENT; + + /* Store qid for reuse. */ + qid = node->qid; + + opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (opened) { + txq = netdev_get_tx_queue(htb->netdev, + mlx5e_qid_from_qos(&priv->channels, qid)); + mlx5e_deactivate_qos_sq(priv, qid); + mlx5e_close_qos_sq(priv, qid); + } + + err = mlx5_qos_destroy_node(htb->mdev, node->hw_id); + if (err) /* Not fatal. */ + qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", + node->hw_id, *classid, err); + + mlx5e_htb_node_delete(htb, node); + + moved_qid = mlx5e_htb_cur_leaf_nodes(htb); + + if (moved_qid == 0) { + /* The last QoS SQ was just destroyed. */ + if (opened) + mlx5e_reactivate_qos_sq(priv, qid, txq); + return 0; + } + moved_qid--; + + if (moved_qid < qid) { + /* The highest QoS SQ was just destroyed. */ + WARN(moved_qid != qid - 1, "Gaps in queue numeration: destroyed queue %u, the highest queue is %u", + qid, moved_qid); + if (opened) + mlx5e_reactivate_qos_sq(priv, qid, txq); + return 0; + } + + WARN(moved_qid == qid, "Can't move node with qid %u to itself", qid); + qos_dbg(htb->mdev, "Moving QoS SQ %u to %u\n", moved_qid, qid); + + node = mlx5e_htb_node_find_by_qid(htb, moved_qid); + WARN(!node, "Could not find a node with qid %u to move to queue %u", + moved_qid, qid); + + /* Stop traffic to the old queue. */ + WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER); + __clear_bit(moved_qid, priv->htb->qos_used_qids); + + if (opened) { + txq = netdev_get_tx_queue(htb->netdev, + mlx5e_qid_from_qos(&priv->channels, moved_qid)); + mlx5e_deactivate_qos_sq(priv, moved_qid); + mlx5e_close_qos_sq(priv, moved_qid); + } + + /* Prevent packets from the old class from getting into the new one. */ + mlx5e_reset_qdisc(htb->netdev, moved_qid); + + __set_bit(qid, htb->qos_used_qids); + WRITE_ONCE(node->qid, qid); + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_open_qos_sq(priv, &priv->channels, node->qid, node->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); + qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x) while moving qid %u to %u, err = %d\n", + node->classid, moved_qid, qid, err); + } else { + mlx5e_activate_qos_sq(priv, node->qid, node->hw_id); + } + } + + mlx5e_update_tx_netdev_queues(priv); + if (opened) + mlx5e_reactivate_qos_sq(priv, moved_qid, txq); + + *classid = node->classid; + return 0; +} + +int +mlx5e_htb_leaf_del_last(struct mlx5e_htb *htb, u16 classid, bool force, + struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *node, *parent; + struct mlx5e_priv *priv = htb->priv; + u32 old_hw_id, new_hw_id; + int err, saved_err = 0; + u16 qid; + + qos_dbg(htb->mdev, "TC_HTB_LEAF_DEL_LAST%s classid %04x\n", + force ? "_FORCE" : "", classid); + + node = mlx5e_htb_node_find(htb, classid); + if (!node) + return -ENOENT; + + err = mlx5_qos_create_leaf_node(htb->mdev, node->parent->parent->hw_id, + node->parent->bw_share, + node->parent->max_average_bw, + &new_hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node."); + qos_err(htb->mdev, "Failed to create a leaf node (class %04x), err = %d\n", + classid, err); + if (!force) + return err; + saved_err = err; + } + + /* Store qid for reuse and prevent clearing the bit. */ + qid = node->qid; + /* Pairs with mlx5e_htb_get_txq_by_classid. */ + WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER); + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + mlx5e_deactivate_qos_sq(priv, qid); + mlx5e_close_qos_sq(priv, qid); + } + + /* Prevent packets from the old class from getting into the new one. */ + mlx5e_reset_qdisc(htb->netdev, qid); + + err = mlx5_qos_destroy_node(htb->mdev, node->hw_id); + if (err) /* Not fatal. */ + qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", + node->hw_id, classid, err); + + parent = node->parent; + mlx5e_htb_node_delete(htb, node); + + node = parent; + WRITE_ONCE(node->qid, qid); + + /* Early return on error in force mode. Parent will still be an inner + * node to be deleted by a following delete operation. + */ + if (saved_err) + return saved_err; + + old_hw_id = node->hw_id; + node->hw_id = new_hw_id; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_open_qos_sq(priv, &priv->channels, node->qid, node->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); + qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n", + classid, err); + } else { + mlx5e_activate_qos_sq(priv, node->qid, node->hw_id); + } + } + + err = mlx5_qos_destroy_node(htb->mdev, old_hw_id); + if (err) /* Not fatal. */ + qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", + node->hw_id, classid, err); + + return 0; +} + +static int +mlx5e_htb_update_children(struct mlx5e_htb *htb, struct mlx5e_qos_node *node, + struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *child; + int err = 0; + int bkt; + + hash_for_each(htb->qos_tc2node, bkt, child, hnode) { + u32 old_bw_share = child->bw_share; + int err_one; + + if (child->parent != node) + continue; + + mlx5e_htb_convert_rate(htb, child->rate, node, &child->bw_share); + if (child->bw_share == old_bw_share) + continue; + + err_one = mlx5_qos_update_node(htb->mdev, child->bw_share, + child->max_average_bw, child->hw_id); + if (!err && err_one) { + err = err_one; + + NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a child node."); + qos_err(htb->mdev, "Failed to modify a child node (class %04x), err = %d\n", + node->classid, err); + } + } + + return err; +} + +int +mlx5e_htb_node_modify(struct mlx5e_htb *htb, u16 classid, u64 rate, u64 ceil, + struct netlink_ext_ack *extack) +{ + u32 bw_share, max_average_bw; + struct mlx5e_qos_node *node; + bool ceil_changed = false; + int err; + + qos_dbg(htb->mdev, "TC_HTB_LEAF_MODIFY classid %04x, rate %llu, ceil %llu\n", + classid, rate, ceil); + + node = mlx5e_htb_node_find(htb, classid); + if (!node) + return -ENOENT; + + node->rate = rate; + mlx5e_htb_convert_rate(htb, rate, node->parent, &bw_share); + mlx5e_htb_convert_ceil(htb, ceil, &max_average_bw); + + err = mlx5_qos_update_node(htb->mdev, bw_share, + max_average_bw, node->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a node."); + qos_err(htb->mdev, "Failed to modify a node (class %04x), err = %d\n", + classid, err); + return err; + } + + if (max_average_bw != node->max_average_bw) + ceil_changed = true; + + node->bw_share = bw_share; + node->max_average_bw = max_average_bw; + + if (ceil_changed) + err = mlx5e_htb_update_children(htb, node, extack); + + return err; +} + +struct mlx5e_htb *mlx5e_htb_alloc(void) +{ + return kvzalloc(sizeof(struct mlx5e_htb), GFP_KERNEL); +} + +void mlx5e_htb_free(struct mlx5e_htb *htb) +{ + kvfree(htb); +} + +int mlx5e_htb_init(struct mlx5e_htb *htb, struct tc_htb_qopt_offload *htb_qopt, + struct net_device *netdev, struct mlx5_core_dev *mdev, + struct mlx5e_selq *selq, struct mlx5e_priv *priv) +{ + htb->mdev = mdev; + htb->netdev = netdev; + htb->selq = selq; + htb->priv = priv; + hash_init(htb->qos_tc2node); + return mlx5e_htb_root_add(htb, htb_qopt->parent_classid, htb_qopt->classid, + htb_qopt->extack); +} + +void mlx5e_htb_cleanup(struct mlx5e_htb *htb) +{ + mlx5e_htb_root_del(htb); +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/htb.h b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.h new file mode 100644 index 0000000000..8386f1ea45 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5E_EN_HTB_H_ +#define __MLX5E_EN_HTB_H_ + +#include "qos.h" + +#define MLX5E_QOS_MAX_LEAF_NODES 256 + +struct mlx5e_selq; +struct mlx5e_htb; + +typedef int (*mlx5e_fp_htb_enumerate)(void *data, u16 qid, u32 hw_id); +int mlx5e_htb_enumerate_leaves(struct mlx5e_htb *htb, mlx5e_fp_htb_enumerate callback, void *data); + +int mlx5e_htb_cur_leaf_nodes(struct mlx5e_htb *htb); + +/* TX datapath API */ +int mlx5e_htb_get_txq_by_classid(struct mlx5e_htb *htb, u16 classid); + +/* HTB TC handlers */ + +int +mlx5e_htb_leaf_alloc_queue(struct mlx5e_htb *htb, u16 classid, + u32 parent_classid, u64 rate, u64 ceil, + struct netlink_ext_ack *extack); +int +mlx5e_htb_leaf_to_inner(struct mlx5e_htb *htb, u16 classid, u16 child_classid, + u64 rate, u64 ceil, struct netlink_ext_ack *extack); +int mlx5e_htb_leaf_del(struct mlx5e_htb *htb, u16 *classid, + struct netlink_ext_ack *extack); +int +mlx5e_htb_leaf_del_last(struct mlx5e_htb *htb, u16 classid, bool force, + struct netlink_ext_ack *extack); +int +mlx5e_htb_node_modify(struct mlx5e_htb *htb, u16 classid, u64 rate, u64 ceil, + struct netlink_ext_ack *extack); +struct mlx5e_htb *mlx5e_htb_alloc(void); +void mlx5e_htb_free(struct mlx5e_htb *htb); +int mlx5e_htb_init(struct mlx5e_htb *htb, struct tc_htb_qopt_offload *htb_qopt, + struct net_device *netdev, struct mlx5_core_dev *mdev, + struct mlx5e_selq *selq, struct mlx5e_priv *priv); +void mlx5e_htb_cleanup(struct mlx5e_htb *htb); +#endif + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c new file mode 100644 index 0000000000..b4f3bd7d34 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2018 Mellanox Technologies + +#include "en.h" +#include "en/hv_vhca_stats.h" +#include "lib/hv_vhca.h" +#include "lib/hv.h" + +struct mlx5e_hv_vhca_per_ring_stats { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; +}; + +static void +mlx5e_hv_vhca_fill_ring_stats(struct mlx5e_priv *priv, int ch, + struct mlx5e_hv_vhca_per_ring_stats *data) +{ + struct mlx5e_channel_stats *stats; + int tc; + + stats = priv->channel_stats[ch]; + data->rx_packets = stats->rq.packets; + data->rx_bytes = stats->rq.bytes; + + for (tc = 0; tc < priv->max_opened_tc; tc++) { + data->tx_packets += stats->sq[tc].packets; + data->tx_bytes += stats->sq[tc].bytes; + } +} + +static void mlx5e_hv_vhca_fill_stats(struct mlx5e_priv *priv, void *data, + int buf_len) +{ + int ch, i = 0; + + for (ch = 0; ch < priv->stats_nch; ch++) { + void *buf = data + i; + + if (WARN_ON_ONCE(buf + + sizeof(struct mlx5e_hv_vhca_per_ring_stats) > + data + buf_len)) + return; + + mlx5e_hv_vhca_fill_ring_stats(priv, ch, buf); + i += sizeof(struct mlx5e_hv_vhca_per_ring_stats); + } +} + +static int mlx5e_hv_vhca_stats_buf_size(struct mlx5e_priv *priv) +{ + return (sizeof(struct mlx5e_hv_vhca_per_ring_stats) * + priv->stats_nch); +} + +static void mlx5e_hv_vhca_stats_work(struct work_struct *work) +{ + struct mlx5e_hv_vhca_stats_agent *sagent; + struct mlx5_hv_vhca_agent *agent; + struct delayed_work *dwork; + struct mlx5e_priv *priv; + int buf_len, rc; + void *buf; + + dwork = to_delayed_work(work); + sagent = container_of(dwork, struct mlx5e_hv_vhca_stats_agent, work); + priv = container_of(sagent, struct mlx5e_priv, stats_agent); + buf_len = mlx5e_hv_vhca_stats_buf_size(priv); + agent = sagent->agent; + buf = sagent->buf; + + memset(buf, 0, buf_len); + mlx5e_hv_vhca_fill_stats(priv, buf, buf_len); + + rc = mlx5_hv_vhca_agent_write(agent, buf, buf_len); + if (rc) { + mlx5_core_err(priv->mdev, + "%s: Failed to write stats, err = %d\n", + __func__, rc); + return; + } + + if (sagent->delay) + queue_delayed_work(priv->wq, &sagent->work, sagent->delay); +} + +enum { + MLX5_HV_VHCA_STATS_VERSION = 1, + MLX5_HV_VHCA_STATS_UPDATE_ONCE = 0xFFFF, +}; + +static void mlx5e_hv_vhca_stats_control(struct mlx5_hv_vhca_agent *agent, + struct mlx5_hv_vhca_control_block *block) +{ + struct mlx5e_hv_vhca_stats_agent *sagent; + struct mlx5e_priv *priv; + + priv = mlx5_hv_vhca_agent_priv(agent); + sagent = &priv->stats_agent; + + block->version = MLX5_HV_VHCA_STATS_VERSION; + block->rings = priv->stats_nch; + + if (!block->command) { + cancel_delayed_work_sync(&priv->stats_agent.work); + return; + } + + sagent->delay = block->command == MLX5_HV_VHCA_STATS_UPDATE_ONCE ? 0 : + msecs_to_jiffies(block->command * 100); + + queue_delayed_work(priv->wq, &sagent->work, sagent->delay); +} + +static void mlx5e_hv_vhca_stats_cleanup(struct mlx5_hv_vhca_agent *agent) +{ + struct mlx5e_priv *priv = mlx5_hv_vhca_agent_priv(agent); + + cancel_delayed_work_sync(&priv->stats_agent.work); +} + +void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) +{ + int buf_len = mlx5e_hv_vhca_stats_buf_size(priv); + struct mlx5_hv_vhca_agent *agent; + + priv->stats_agent.buf = kvzalloc(buf_len, GFP_KERNEL); + if (!priv->stats_agent.buf) + return; + + agent = mlx5_hv_vhca_agent_create(priv->mdev->hv_vhca, + MLX5_HV_VHCA_AGENT_STATS, + mlx5e_hv_vhca_stats_control, NULL, + mlx5e_hv_vhca_stats_cleanup, + priv); + + if (IS_ERR_OR_NULL(agent)) { + if (IS_ERR(agent)) + netdev_warn(priv->netdev, + "Failed to create hv vhca stats agent, err = %ld\n", + PTR_ERR(agent)); + + kvfree(priv->stats_agent.buf); + return; + } + + priv->stats_agent.agent = agent; + INIT_DELAYED_WORK(&priv->stats_agent.work, mlx5e_hv_vhca_stats_work); +} + +void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) +{ + if (IS_ERR_OR_NULL(priv->stats_agent.agent)) + return; + + mlx5_hv_vhca_agent_destroy(priv->stats_agent.agent); + kvfree(priv->stats_agent.buf); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h new file mode 100644 index 0000000000..29c8c6d326 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_STATS_VHCA_H__ +#define __MLX5_EN_STATS_VHCA_H__ +#include "en.h" + +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) + +void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv); +void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv); + +#else +static inline void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) {} +static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) {} +#endif + +#endif /* __MLX5_EN_STATS_VHCA_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c new file mode 100644 index 0000000000..4e72ca8070 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c @@ -0,0 +1,263 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies */ + +#include +#include +#include +#include +#include + +#include "mapping.h" + +#define MAPPING_GRACE_PERIOD 2000 + +static LIST_HEAD(shared_ctx_list); +static DEFINE_MUTEX(shared_ctx_lock); + +struct mapping_ctx { + struct xarray xarray; + DECLARE_HASHTABLE(ht, 8); + struct mutex lock; /* Guards hashtable and xarray */ + unsigned long max_id; + size_t data_size; + bool delayed_removal; + struct delayed_work dwork; + struct list_head pending_list; + spinlock_t pending_list_lock; /* Guards pending list */ + u64 id; + u8 type; + struct list_head list; + refcount_t refcount; +}; + +struct mapping_item { + struct rcu_head rcu; + struct list_head list; + unsigned long timeout; + struct hlist_node node; + int cnt; + u32 id; + char data[]; +}; + +int mapping_add(struct mapping_ctx *ctx, void *data, u32 *id) +{ + struct mapping_item *mi; + int err = -ENOMEM; + u32 hash_key; + + mutex_lock(&ctx->lock); + + hash_key = jhash(data, ctx->data_size, 0); + hash_for_each_possible(ctx->ht, mi, node, hash_key) { + if (!memcmp(data, mi->data, ctx->data_size)) + goto attach; + } + + mi = kzalloc(sizeof(*mi) + ctx->data_size, GFP_KERNEL); + if (!mi) + goto err_alloc; + + memcpy(mi->data, data, ctx->data_size); + hash_add(ctx->ht, &mi->node, hash_key); + + err = xa_alloc(&ctx->xarray, &mi->id, mi, XA_LIMIT(1, ctx->max_id), + GFP_KERNEL); + if (err) + goto err_assign; +attach: + ++mi->cnt; + *id = mi->id; + + mutex_unlock(&ctx->lock); + + return 0; + +err_assign: + hash_del(&mi->node); + kfree(mi); +err_alloc: + mutex_unlock(&ctx->lock); + + return err; +} + +static void mapping_remove_and_free(struct mapping_ctx *ctx, + struct mapping_item *mi) +{ + xa_erase(&ctx->xarray, mi->id); + kfree_rcu(mi, rcu); +} + +static void mapping_free_item(struct mapping_ctx *ctx, + struct mapping_item *mi) +{ + if (!ctx->delayed_removal) { + mapping_remove_and_free(ctx, mi); + return; + } + + mi->timeout = jiffies + msecs_to_jiffies(MAPPING_GRACE_PERIOD); + + spin_lock(&ctx->pending_list_lock); + list_add_tail(&mi->list, &ctx->pending_list); + spin_unlock(&ctx->pending_list_lock); + + schedule_delayed_work(&ctx->dwork, MAPPING_GRACE_PERIOD); +} + +int mapping_remove(struct mapping_ctx *ctx, u32 id) +{ + unsigned long index = id; + struct mapping_item *mi; + int err = -ENOENT; + + mutex_lock(&ctx->lock); + mi = xa_load(&ctx->xarray, index); + if (!mi) + goto out; + err = 0; + + if (--mi->cnt > 0) + goto out; + + hash_del(&mi->node); + mapping_free_item(ctx, mi); +out: + mutex_unlock(&ctx->lock); + + return err; +} + +int mapping_find(struct mapping_ctx *ctx, u32 id, void *data) +{ + unsigned long index = id; + struct mapping_item *mi; + int err = -ENOENT; + + rcu_read_lock(); + mi = xa_load(&ctx->xarray, index); + if (!mi) + goto err_find; + + memcpy(data, mi->data, ctx->data_size); + err = 0; + +err_find: + rcu_read_unlock(); + return err; +} + +static void +mapping_remove_and_free_list(struct mapping_ctx *ctx, struct list_head *list) +{ + struct mapping_item *mi; + + list_for_each_entry(mi, list, list) + mapping_remove_and_free(ctx, mi); +} + +static void mapping_work_handler(struct work_struct *work) +{ + unsigned long min_timeout = 0, now = jiffies; + struct mapping_item *mi, *next; + LIST_HEAD(pending_items); + struct mapping_ctx *ctx; + + ctx = container_of(work, struct mapping_ctx, dwork.work); + + spin_lock(&ctx->pending_list_lock); + list_for_each_entry_safe(mi, next, &ctx->pending_list, list) { + if (time_after(now, mi->timeout)) + list_move(&mi->list, &pending_items); + else if (!min_timeout || + time_before(mi->timeout, min_timeout)) + min_timeout = mi->timeout; + } + spin_unlock(&ctx->pending_list_lock); + + mapping_remove_and_free_list(ctx, &pending_items); + + if (min_timeout) + schedule_delayed_work(&ctx->dwork, abs(min_timeout - now)); +} + +static void mapping_flush_work(struct mapping_ctx *ctx) +{ + if (!ctx->delayed_removal) + return; + + cancel_delayed_work_sync(&ctx->dwork); + mapping_remove_and_free_list(ctx, &ctx->pending_list); +} + +struct mapping_ctx * +mapping_create(size_t data_size, u32 max_id, bool delayed_removal) +{ + struct mapping_ctx *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return ERR_PTR(-ENOMEM); + + ctx->max_id = max_id ? max_id : UINT_MAX; + ctx->data_size = data_size; + + if (delayed_removal) { + INIT_DELAYED_WORK(&ctx->dwork, mapping_work_handler); + INIT_LIST_HEAD(&ctx->pending_list); + spin_lock_init(&ctx->pending_list_lock); + ctx->delayed_removal = true; + } + + mutex_init(&ctx->lock); + xa_init_flags(&ctx->xarray, XA_FLAGS_ALLOC1); + + refcount_set(&ctx->refcount, 1); + INIT_LIST_HEAD(&ctx->list); + + return ctx; +} + +struct mapping_ctx * +mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal) +{ + struct mapping_ctx *ctx; + + mutex_lock(&shared_ctx_lock); + list_for_each_entry(ctx, &shared_ctx_list, list) { + if (ctx->id == id && ctx->type == type) { + if (refcount_inc_not_zero(&ctx->refcount)) + goto unlock; + break; + } + } + + ctx = mapping_create(data_size, max_id, delayed_removal); + if (IS_ERR(ctx)) + goto unlock; + + ctx->id = id; + ctx->type = type; + list_add(&ctx->list, &shared_ctx_list); + +unlock: + mutex_unlock(&shared_ctx_lock); + return ctx; +} + +void mapping_destroy(struct mapping_ctx *ctx) +{ + if (!refcount_dec_and_test(&ctx->refcount)) + return; + + mutex_lock(&shared_ctx_lock); + list_del(&ctx->list); + mutex_unlock(&shared_ctx_lock); + + mapping_flush_work(ctx); + xa_destroy(&ctx->xarray); + mutex_destroy(&ctx->lock); + + kfree(ctx); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h new file mode 100644 index 0000000000..4e2119f0f4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies */ + +#ifndef __MLX5_MAPPING_H__ +#define __MLX5_MAPPING_H__ + +struct mapping_ctx; + +int mapping_add(struct mapping_ctx *ctx, void *data, u32 *id); +int mapping_remove(struct mapping_ctx *ctx, u32 id); +int mapping_find(struct mapping_ctx *ctx, u32 id, void *data); + +/* mapping uses an xarray to map data to ids in add(), and for find(). + * For locking, it uses a internal xarray spin lock for add()/remove(), + * find() uses rcu_read_lock(). + * Choosing delayed_removal postpones the removal of a previously mapped + * id by MAPPING_GRACE_PERIOD milliseconds. + * This is to avoid races against hardware, where we mark the packet in + * hardware with a previous id, and quick remove() and add() reusing the same + * previous id. Then find() will get the new mapping instead of the old + * which was used to mark the packet. + */ +struct mapping_ctx *mapping_create(size_t data_size, u32 max_id, + bool delayed_removal); +void mapping_destroy(struct mapping_ctx *ctx); + +/* adds mapping with an id or get an existing mapping with the same id + */ +struct mapping_ctx * +mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal); + +#endif /* __MLX5_MAPPING_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c new file mode 100644 index 0000000000..cf60f0a3ff --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2020 Mellanox Technologies + +#include +#include "mod_hdr.h" + +#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) + +struct mod_hdr_key { + int num_actions; + void *actions; +}; + +struct mlx5e_mod_hdr_handle { + /* a node of a hash table which keeps all the mod_hdr entries */ + struct hlist_node mod_hdr_hlist; + + struct mod_hdr_key key; + + struct mlx5_modify_hdr *modify_hdr; + + refcount_t refcnt; + struct completion res_ready; + int compl_result; +}; + +static u32 hash_mod_hdr_info(struct mod_hdr_key *key) +{ + return jhash(key->actions, + key->num_actions * MLX5_MH_ACT_SZ, 0); +} + +static int cmp_mod_hdr_info(struct mod_hdr_key *a, struct mod_hdr_key *b) +{ + if (a->num_actions != b->num_actions) + return 1; + + return memcmp(a->actions, b->actions, + a->num_actions * MLX5_MH_ACT_SZ); +} + +void mlx5e_mod_hdr_tbl_init(struct mod_hdr_tbl *tbl) +{ + mutex_init(&tbl->lock); + hash_init(tbl->hlist); +} + +void mlx5e_mod_hdr_tbl_destroy(struct mod_hdr_tbl *tbl) +{ + WARN_ON(!hash_empty(tbl->hlist)); + mutex_destroy(&tbl->lock); +} + +static struct mlx5e_mod_hdr_handle *mod_hdr_get(struct mod_hdr_tbl *tbl, + struct mod_hdr_key *key, + u32 hash_key) +{ + struct mlx5e_mod_hdr_handle *mh, *found = NULL; + + hash_for_each_possible(tbl->hlist, mh, mod_hdr_hlist, hash_key) { + if (!cmp_mod_hdr_info(&mh->key, key)) { + refcount_inc(&mh->refcnt); + found = mh; + break; + } + } + + return found; +} + +struct mlx5e_mod_hdr_handle * +mlx5e_mod_hdr_attach(struct mlx5_core_dev *mdev, + struct mod_hdr_tbl *tbl, + enum mlx5_flow_namespace_type namespace, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +{ + int num_actions, actions_size, err; + struct mlx5e_mod_hdr_handle *mh; + struct mod_hdr_key key; + u32 hash_key; + + num_actions = mod_hdr_acts->num_actions; + actions_size = MLX5_MH_ACT_SZ * num_actions; + + key.actions = mod_hdr_acts->actions; + key.num_actions = num_actions; + + hash_key = hash_mod_hdr_info(&key); + + mutex_lock(&tbl->lock); + mh = mod_hdr_get(tbl, &key, hash_key); + if (mh) { + mutex_unlock(&tbl->lock); + wait_for_completion(&mh->res_ready); + + if (mh->compl_result < 0) { + err = -EREMOTEIO; + goto attach_header_err; + } + goto attach_header; + } + + mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL); + if (!mh) { + mutex_unlock(&tbl->lock); + return ERR_PTR(-ENOMEM); + } + + mh->key.actions = (void *)mh + sizeof(*mh); + memcpy(mh->key.actions, key.actions, actions_size); + mh->key.num_actions = num_actions; + refcount_set(&mh->refcnt, 1); + init_completion(&mh->res_ready); + + hash_add(tbl->hlist, &mh->mod_hdr_hlist, hash_key); + mutex_unlock(&tbl->lock); + + mh->modify_hdr = mlx5_modify_header_alloc(mdev, namespace, + mh->key.num_actions, + mh->key.actions); + if (IS_ERR(mh->modify_hdr)) { + err = PTR_ERR(mh->modify_hdr); + mh->compl_result = err; + goto alloc_header_err; + } + mh->compl_result = 1; + complete_all(&mh->res_ready); + +attach_header: + return mh; + +alloc_header_err: + complete_all(&mh->res_ready); +attach_header_err: + mlx5e_mod_hdr_detach(mdev, tbl, mh); + return ERR_PTR(err); +} + +void mlx5e_mod_hdr_detach(struct mlx5_core_dev *mdev, + struct mod_hdr_tbl *tbl, + struct mlx5e_mod_hdr_handle *mh) +{ + if (!refcount_dec_and_mutex_lock(&mh->refcnt, &tbl->lock)) + return; + hash_del(&mh->mod_hdr_hlist); + mutex_unlock(&tbl->lock); + + if (mh->compl_result > 0) + mlx5_modify_header_dealloc(mdev, mh->modify_hdr); + + kfree(mh); +} + +struct mlx5_modify_hdr *mlx5e_mod_hdr_get(struct mlx5e_mod_hdr_handle *mh) +{ + return mh->modify_hdr; +} + +char * +mlx5e_mod_hdr_alloc(struct mlx5_core_dev *mdev, int namespace, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +{ + int new_num_actions, max_hw_actions; + size_t new_sz, old_sz; + void *ret; + + if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions) + goto out; + + max_hw_actions = mlx5e_mod_hdr_max_actions(mdev, namespace); + new_num_actions = min(max_hw_actions, + mod_hdr_acts->actions ? + mod_hdr_acts->max_actions * 2 : 1); + if (mod_hdr_acts->max_actions == new_num_actions) + return ERR_PTR(-ENOSPC); + + new_sz = MLX5_MH_ACT_SZ * new_num_actions; + old_sz = mod_hdr_acts->max_actions * MLX5_MH_ACT_SZ; + + if (mod_hdr_acts->is_static) { + ret = kzalloc(new_sz, GFP_KERNEL); + if (ret) { + memcpy(ret, mod_hdr_acts->actions, old_sz); + mod_hdr_acts->is_static = false; + } + } else { + ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL); + if (ret) + memset(ret + old_sz, 0, new_sz - old_sz); + } + if (!ret) + return ERR_PTR(-ENOMEM); + + mod_hdr_acts->actions = ret; + mod_hdr_acts->max_actions = new_num_actions; + +out: + return mod_hdr_acts->actions + (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ); +} + +void +mlx5e_mod_hdr_dealloc(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +{ + if (!mod_hdr_acts->is_static) + kfree(mod_hdr_acts->actions); + + mod_hdr_acts->actions = NULL; + mod_hdr_acts->num_actions = 0; + mod_hdr_acts->max_actions = 0; +} + +char * +mlx5e_mod_hdr_get_item(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, int pos) +{ + return mod_hdr_acts->actions + (pos * MLX5_MH_ACT_SZ); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h new file mode 100644 index 0000000000..b8dac418d0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies */ + +#ifndef __MLX5E_EN_MOD_HDR_H__ +#define __MLX5E_EN_MOD_HDR_H__ + +#include +#include + +#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) + +struct mlx5e_mod_hdr_handle; + +struct mlx5e_tc_mod_hdr_acts { + int num_actions; + int max_actions; + bool is_static; + void *actions; +}; + +#define DECLARE_MOD_HDR_ACTS_ACTIONS(name, len) \ + u8 name[len][MLX5_MH_ACT_SZ] = {} + +#define DECLARE_MOD_HDR_ACTS(name, acts_arr) \ + struct mlx5e_tc_mod_hdr_acts name = { \ + .max_actions = ARRAY_SIZE(acts_arr), \ + .is_static = true, \ + .actions = acts_arr, \ + } + +char *mlx5e_mod_hdr_alloc(struct mlx5_core_dev *mdev, int namespace, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); +void mlx5e_mod_hdr_dealloc(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); +char *mlx5e_mod_hdr_get_item(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, int pos); + +struct mlx5e_mod_hdr_handle * +mlx5e_mod_hdr_attach(struct mlx5_core_dev *mdev, + struct mod_hdr_tbl *tbl, + enum mlx5_flow_namespace_type namespace, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); +void mlx5e_mod_hdr_detach(struct mlx5_core_dev *mdev, + struct mod_hdr_tbl *tbl, + struct mlx5e_mod_hdr_handle *mh); +struct mlx5_modify_hdr *mlx5e_mod_hdr_get(struct mlx5e_mod_hdr_handle *mh); + +void mlx5e_mod_hdr_tbl_init(struct mod_hdr_tbl *tbl); +void mlx5e_mod_hdr_tbl_destroy(struct mod_hdr_tbl *tbl); + +static inline int mlx5e_mod_hdr_max_actions(struct mlx5_core_dev *mdev, int namespace) +{ + if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */ + return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions); + else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */ + return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions); +} + +#endif /* __MLX5E_EN_MOD_HDR_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c new file mode 100644 index 0000000000..254c847390 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include "en.h" +#include "monitor_stats.h" +#include "lib/eq.h" + +/* Driver will set the following watch counters list: + * Ppcnt.802_3: + * a_in_range_length_errors Type: 0x0, Counter: 0x0, group_id = N/A + * a_out_of_range_length_field Type: 0x0, Counter: 0x1, group_id = N/A + * a_frame_too_long_errors Type: 0x0, Counter: 0x2, group_id = N/A + * a_frame_check_sequence_errors Type: 0x0, Counter: 0x3, group_id = N/A + * a_alignment_errors Type: 0x0, Counter: 0x4, group_id = N/A + * if_out_discards Type: 0x0, Counter: 0x5, group_id = N/A + * Q_Counters: + * Q[index].rx_out_of_buffer Type: 0x1, Counter: 0x4, group_id = counter_ix + */ + +#define NUM_REQ_PPCNT_COUNTER_S1 MLX5_CMD_SET_MONITOR_NUM_PPCNT_COUNTER_SET1 +#define NUM_REQ_Q_COUNTERS_S1 MLX5_CMD_SET_MONITOR_NUM_Q_COUNTERS_SET1 + +int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + if (!MLX5_CAP_GEN(mdev, max_num_of_monitor_counters)) + return false; + if (MLX5_CAP_PCAM_REG(mdev, ppcnt) && + MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters) < + NUM_REQ_PPCNT_COUNTER_S1) + return false; + if (MLX5_CAP_GEN(mdev, num_q_monitor_counters) < + NUM_REQ_Q_COUNTERS_S1) + return false; + return true; +} + +void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv) +{ + u32 in[MLX5_ST_SZ_DW(arm_monitor_counter_in)] = {}; + + MLX5_SET(arm_monitor_counter_in, in, opcode, + MLX5_CMD_OP_ARM_MONITOR_COUNTER); + mlx5_cmd_exec_in(priv->mdev, arm_monitor_counter, in); +} + +static void mlx5e_monitor_counters_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + monitor_counters_work); + + mutex_lock(&priv->state_lock); + mlx5e_stats_update_ndo_stats(priv); + mutex_unlock(&priv->state_lock); + mlx5e_monitor_counter_arm(priv); +} + +static int mlx5e_monitor_event_handler(struct notifier_block *nb, + unsigned long event, void *eqe) +{ + struct mlx5e_priv *priv = mlx5_nb_cof(nb, struct mlx5e_priv, + monitor_counters_nb); + queue_work(priv->wq, &priv->monitor_counters_work); + return NOTIFY_OK; +} + +static int fill_monitor_counter_ppcnt_set1(int cnt, u32 *in) +{ + enum mlx5_monitor_counter_ppcnt ppcnt_cnt; + + for (ppcnt_cnt = 0; + ppcnt_cnt < NUM_REQ_PPCNT_COUNTER_S1; + ppcnt_cnt++, cnt++) { + MLX5_SET(set_monitor_counter_in, in, + monitor_counter[cnt].type, + MLX5_QUERY_MONITOR_CNT_TYPE_PPCNT); + MLX5_SET(set_monitor_counter_in, in, + monitor_counter[cnt].counter, + ppcnt_cnt); + } + return ppcnt_cnt; +} + +static int fill_monitor_counter_q_counter_set1(int cnt, int q_counter, u32 *in) +{ + MLX5_SET(set_monitor_counter_in, in, + monitor_counter[cnt].type, + MLX5_QUERY_MONITOR_CNT_TYPE_Q_COUNTER); + MLX5_SET(set_monitor_counter_in, in, + monitor_counter[cnt].counter, + MLX5_QUERY_MONITOR_Q_COUNTER_RX_OUT_OF_BUFFER); + MLX5_SET(set_monitor_counter_in, in, + monitor_counter[cnt].counter_group_id, + q_counter); + return 1; +} + +/* check if mlx5e_monitor_counter_supported before calling this function*/ +static void mlx5e_set_monitor_counter(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int max_num_of_counters = MLX5_CAP_GEN(mdev, max_num_of_monitor_counters); + int num_q_counters = MLX5_CAP_GEN(mdev, num_q_monitor_counters); + int num_ppcnt_counters = !MLX5_CAP_PCAM_REG(mdev, ppcnt) ? 0 : + MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters); + u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {}; + int q_counter = priv->q_counter; + int cnt = 0; + + if (num_ppcnt_counters >= NUM_REQ_PPCNT_COUNTER_S1 && + max_num_of_counters >= (NUM_REQ_PPCNT_COUNTER_S1 + cnt)) + cnt += fill_monitor_counter_ppcnt_set1(cnt, in); + + if (num_q_counters >= NUM_REQ_Q_COUNTERS_S1 && + max_num_of_counters >= (NUM_REQ_Q_COUNTERS_S1 + cnt) && + q_counter) + cnt += fill_monitor_counter_q_counter_set1(cnt, q_counter, in); + + MLX5_SET(set_monitor_counter_in, in, num_of_counters, cnt); + MLX5_SET(set_monitor_counter_in, in, opcode, + MLX5_CMD_OP_SET_MONITOR_COUNTER); + + mlx5_cmd_exec_in(mdev, set_monitor_counter, in); +} + +/* check if mlx5e_monitor_counter_supported before calling this function*/ +void mlx5e_monitor_counter_init(struct mlx5e_priv *priv) +{ + INIT_WORK(&priv->monitor_counters_work, mlx5e_monitor_counters_work); + MLX5_NB_INIT(&priv->monitor_counters_nb, mlx5e_monitor_event_handler, + MONITOR_COUNTER); + mlx5_eq_notifier_register(priv->mdev, &priv->monitor_counters_nb); + + mlx5e_set_monitor_counter(priv); + mlx5e_monitor_counter_arm(priv); + queue_work(priv->wq, &priv->update_stats_work); +} + +/* check if mlx5e_monitor_counter_supported before calling this function*/ +void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv) +{ + u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {}; + + MLX5_SET(set_monitor_counter_in, in, opcode, + MLX5_CMD_OP_SET_MONITOR_COUNTER); + + mlx5_cmd_exec_in(priv->mdev, set_monitor_counter, in); + mlx5_eq_notifier_unregister(priv->mdev, &priv->monitor_counters_nb); + cancel_work_sync(&priv->monitor_counters_work); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h new file mode 100644 index 0000000000..e1ac4b3d22 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#ifndef __MLX5_MONITOR_H__ +#define __MLX5_MONITOR_H__ + +int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv); +void mlx5e_monitor_counter_init(struct mlx5e_priv *priv); +void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv); +void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv); + +#endif /* __MLX5_MONITOR_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c new file mode 100644 index 0000000000..30507b7c2f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -0,0 +1,1312 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "en/params.h" +#include "en/txrx.h" +#include "en/port.h" +#include "en_accel/en_accel.h" +#include "en_accel/ipsec.h" +#include +#include + +static u8 mlx5e_mpwrq_min_page_shift(struct mlx5_core_dev *mdev) +{ + u8 min_page_shift = MLX5_CAP_GEN_2(mdev, log_min_mkey_entity_size); + + return min_page_shift ? : 12; +} + +u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk) +{ + u8 req_page_shift = xsk ? order_base_2(xsk->chunk_size) : PAGE_SHIFT; + u8 min_page_shift = mlx5e_mpwrq_min_page_shift(mdev); + + /* Regular RQ uses order-0 pages, the NIC must be able to map them. */ + if (WARN_ON_ONCE(!xsk && req_page_shift < min_page_shift)) + min_page_shift = req_page_shift; + + return max(req_page_shift, min_page_shift); +} + +enum mlx5e_mpwrq_umr_mode +mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk) +{ + /* Different memory management schemes use different mechanisms to map + * user-mode memory. The stricter guarantees we have, the faster + * mechanisms we use: + * 1. MTT - direct mapping in page granularity. + * 2. KSM - indirect mapping to another MKey to arbitrary addresses, but + * all mappings have the same size. + * 3. KLM - indirect mapping to another MKey to arbitrary addresses, and + * mappings can have different sizes. + */ + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + bool unaligned = xsk ? xsk->unaligned : false; + bool oversized = false; + + if (xsk) { + oversized = xsk->chunk_size < (1 << page_shift); + WARN_ON_ONCE(xsk->chunk_size > (1 << page_shift)); + } + + /* XSK frame size doesn't match the UMR page size, either because the + * frame size is not a power of two, or it's smaller than the minimal + * page size supported by the firmware. + * It's possible to receive packets bigger than MTU in certain setups. + * To avoid writing over the XSK frame boundary, the top region of each + * stride is mapped to a garbage page, resulting in two mappings of + * different sizes per frame. + */ + if (oversized) { + /* An optimization for frame sizes equal to 3 * power_of_two. + * 3 KSMs point to the frame, and one KSM points to the garbage + * page, which works faster than KLM. + */ + if (xsk->chunk_size % 3 == 0 && is_power_of_2(xsk->chunk_size / 3)) + return MLX5E_MPWRQ_UMR_MODE_TRIPLE; + + return MLX5E_MPWRQ_UMR_MODE_OVERSIZED; + } + + /* XSK frames can start at arbitrary unaligned locations, but they all + * have the same size which is a power of two. It allows to optimize to + * one KSM per frame. + */ + if (unaligned) + return MLX5E_MPWRQ_UMR_MODE_UNALIGNED; + + /* XSK: frames are naturally aligned, MTT can be used. + * Non-XSK: Allocations happen in units of CPU pages, therefore, the + * mappings are naturally aligned. + */ + return MLX5E_MPWRQ_UMR_MODE_ALIGNED; +} + +u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode) +{ + switch (mode) { + case MLX5E_MPWRQ_UMR_MODE_ALIGNED: + return sizeof(struct mlx5_mtt); + case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: + return sizeof(struct mlx5_ksm); + case MLX5E_MPWRQ_UMR_MODE_OVERSIZED: + return sizeof(struct mlx5_klm) * 2; + case MLX5E_MPWRQ_UMR_MODE_TRIPLE: + return sizeof(struct mlx5_ksm) * 4; + } + WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", mode); + return 0; +} + +u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode); + u8 max_pages_per_wqe, max_log_mpwqe_size; + u16 max_wqe_size; + + /* Keep in sync with MLX5_MPWRQ_MAX_PAGES_PER_WQE. */ + max_wqe_size = mlx5e_get_max_sq_aligned_wqebbs(mdev) * MLX5_SEND_WQE_BB; + max_pages_per_wqe = ALIGN_DOWN(max_wqe_size - sizeof(struct mlx5e_umr_wqe), + MLX5_UMR_FLEX_ALIGNMENT) / umr_entry_size; + max_log_mpwqe_size = ilog2(max_pages_per_wqe) + page_shift; + + WARN_ON_ONCE(max_log_mpwqe_size < MLX5E_ORDER2_MAX_PACKET_MTU); + + return min_t(u8, max_log_mpwqe_size, MLX5_MPWRQ_MAX_LOG_WQE_SZ); +} + +u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + u8 log_wqe_sz = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode); + u8 pages_per_wqe; + + pages_per_wqe = log_wqe_sz > page_shift ? (1 << (log_wqe_sz - page_shift)) : 1; + + /* Two MTTs are needed to form an octword. The number of MTTs is encoded + * in octwords in a UMR WQE, so we need at least two to avoid mapping + * garbage addresses. + */ + if (WARN_ON_ONCE(pages_per_wqe < 2 && umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) + pages_per_wqe = 2; + + /* Sanity check for further calculations to succeed. */ + BUILD_BUG_ON(MLX5_MPWRQ_MAX_PAGES_PER_WQE > 64); + if (WARN_ON_ONCE(pages_per_wqe > MLX5_MPWRQ_MAX_PAGES_PER_WQE)) + return MLX5_MPWRQ_MAX_PAGES_PER_WQE; + + return pages_per_wqe; +} + +u16 mlx5e_mpwrq_umr_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + u8 pages_per_wqe = mlx5e_mpwrq_pages_per_wqe(mdev, page_shift, umr_mode); + u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode); + u16 umr_wqe_sz; + + umr_wqe_sz = sizeof(struct mlx5e_umr_wqe) + + ALIGN(pages_per_wqe * umr_entry_size, MLX5_UMR_FLEX_ALIGNMENT); + + WARN_ON_ONCE(DIV_ROUND_UP(umr_wqe_sz, MLX5_SEND_WQE_DS) > MLX5_WQE_CTRL_DS_MASK); + + return umr_wqe_sz; +} + +u8 mlx5e_mpwrq_umr_wqebbs(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + return DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(mdev, page_shift, umr_mode), + MLX5_SEND_WQE_BB); +} + +u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + u8 pages_per_wqe = mlx5e_mpwrq_pages_per_wqe(mdev, page_shift, umr_mode); + + /* Add another page as a buffer between WQEs. This page will absorb + * write overflow by the hardware, when receiving packets larger than + * MTU. These oversize packets are dropped by the driver at a later + * stage. + */ + return ALIGN(pages_per_wqe + 1, + MLX5_SEND_WQE_BB / mlx5e_mpwrq_umr_entry_size(umr_mode)); +} + +u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + /* Same limits apply to KSMs and KLMs. */ + u32 klm_limit = min(MLX5E_MAX_RQ_NUM_KSMS, + 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size)); + + switch (umr_mode) { + case MLX5E_MPWRQ_UMR_MODE_ALIGNED: + return MLX5E_MAX_RQ_NUM_MTTS; + case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: + return klm_limit; + case MLX5E_MPWRQ_UMR_MODE_OVERSIZED: + /* Each entry is two KLMs. */ + return klm_limit / 2; + case MLX5E_MPWRQ_UMR_MODE_TRIPLE: + /* Each entry is four KSMs. */ + return klm_limit / 4; + } + WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode); + return 0; +} + +static u8 mlx5e_mpwrq_max_log_rq_size(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + u8 mtts_per_wqe = mlx5e_mpwrq_mtts_per_wqe(mdev, page_shift, umr_mode); + u32 max_entries = mlx5e_mpwrq_max_num_entries(mdev, umr_mode); + + return ilog2(max_entries / mtts_per_wqe); +} + +u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + return mlx5e_mpwrq_max_log_rq_size(mdev, page_shift, umr_mode) + + mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) - + MLX5E_ORDER2_MAX_PACKET_MTU; +} + +u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + u16 headroom; + + if (xsk) + return xsk->headroom; + + headroom = NET_IP_ALIGN; + if (params->xdp_prog) + headroom += XDP_PACKET_HEADROOM; + else + headroom += MLX5_RX_HEADROOM; + + return headroom; +} + +static u32 mlx5e_rx_get_linear_sz_xsk(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + + return xsk->headroom + hw_mtu; +} + +static u32 mlx5e_rx_get_linear_sz_skb(struct mlx5e_params *params, bool xsk) +{ + /* SKBs built on XDP_PASS on XSK RQs don't have headroom. */ + u16 headroom = xsk ? 0 : mlx5e_get_linear_rq_headroom(params, NULL); + u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + + return MLX5_SKB_FRAG_SZ(headroom + hw_mtu); +} + +static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + bool mpwqe) +{ + u32 sz; + + /* XSK frames are mapped as individual pages, because frames may come in + * an arbitrary order from random locations in the UMEM. + */ + if (xsk) + return mpwqe ? 1 << mlx5e_mpwrq_page_shift(mdev, xsk) : PAGE_SIZE; + + sz = roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false)); + + /* XDP in mlx5e doesn't support multiple packets per page. + * Do not assume sz <= PAGE_SIZE if params->xdp_prog is set. + */ + return params->xdp_prog && sz < PAGE_SIZE ? PAGE_SIZE : sz; +} + +static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + u32 linear_stride_sz = mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true); + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + + return mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) - + order_base_2(linear_stride_sz); +} + +bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE) + return false; + + /* Both XSK and non-XSK cases allocate an SKB on XDP_PASS. Packet data + * must fit into a CPU page. + */ + if (mlx5e_rx_get_linear_sz_skb(params, xsk) > PAGE_SIZE) + return false; + + /* XSK frames must be big enough to hold the packet data. */ + if (xsk && mlx5e_rx_get_linear_sz_xsk(params, xsk) > xsk->chunk_size) + return false; + + return true; +} + +static bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev, + u8 log_stride_sz, u8 log_num_strides, + u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + if (log_stride_sz + log_num_strides != + mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode)) + return false; + + if (log_stride_sz < MLX5_MPWQE_LOG_STRIDE_SZ_BASE || + log_stride_sz > MLX5_MPWQE_LOG_STRIDE_SZ_MAX) + return false; + + if (log_num_strides > MLX5_MPWQE_LOG_NUM_STRIDES_MAX) + return false; + + if (MLX5_CAP_GEN(mdev, ext_stride_num_range)) + return log_num_strides >= MLX5_MPWQE_LOG_NUM_STRIDES_EXT_BASE; + + return log_num_strides >= MLX5_MPWQE_LOG_NUM_STRIDES_BASE; +} + +bool mlx5e_verify_params_rx_mpwqe_strides(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); + u8 log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + + return mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size, + log_wqe_num_of_strides, + page_shift, umr_mode); +} + +bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + u8 log_num_strides; + u8 log_stride_sz; + u8 log_wqe_sz; + + if (!mlx5e_rx_is_linear_skb(mdev, params, xsk)) + return false; + + log_stride_sz = order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true)); + log_wqe_sz = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode); + + if (log_wqe_sz < log_stride_sz) + return false; + + log_num_strides = log_wqe_sz - log_stride_sz; + + return mlx5e_verify_rx_mpwqe_strides(mdev, log_stride_sz, + log_num_strides, page_shift, + umr_mode); +} + +u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + u8 log_pkts_per_wqe, page_shift, max_log_rq_size; + + log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(mdev, params, xsk); + page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + max_log_rq_size = mlx5e_mpwrq_max_log_rq_size(mdev, page_shift, umr_mode); + + /* Numbers are unsigned, don't subtract to avoid underflow. */ + if (params->log_rq_mtu_frames < + log_pkts_per_wqe + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW) + return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW; + + /* Ethtool's rx_max_pending is calculated for regular RQ, that uses + * pages of PAGE_SIZE. Max length of an XSK RQ might differ if it uses a + * frame size not equal to PAGE_SIZE. + * A stricter condition is checked in mlx5e_mpwrq_validate_xsk, WARN on + * unexpected failure. + */ + if (WARN_ON_ONCE(params->log_rq_mtu_frames > log_pkts_per_wqe + max_log_rq_size)) + return max_log_rq_size; + + return params->log_rq_mtu_frames - log_pkts_per_wqe; +} + +u8 mlx5e_shampo_get_log_hd_entry_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + return order_base_2(DIV_ROUND_UP(MLX5E_RX_MAX_HEAD, MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE)); +} + +u8 mlx5e_shampo_get_log_rsrv_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + return order_base_2(MLX5E_SHAMPO_WQ_RESRV_SIZE / MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE); +} + +u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + u32 resrv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * + PAGE_SIZE; + + return order_base_2(DIV_ROUND_UP(resrv_size, params->sw_mtu)); +} + +u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) + return order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true)); + + /* XDP in mlx5e doesn't support multiple packets per page. */ + if (params->xdp_prog) + return PAGE_SHIFT; + + return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev); +} + +u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + u8 log_wqe_size, log_stride_size; + + log_wqe_size = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode); + log_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + WARN(log_wqe_size < log_stride_size, + "Log WQE size %u < log stride size %u (page shift %u, umr mode %d, xsk on? %d)\n", + log_wqe_size, log_stride_size, page_shift, umr_mode, !!xsk); + return log_wqe_size - log_stride_size; +} + +u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz) +{ +#define UMR_WQE_BULK (2) + return min_t(unsigned int, UMR_WQE_BULK, wq_sz / 2 - 1); +} + +u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + u16 linear_headroom = mlx5e_get_linear_rq_headroom(params, xsk); + + if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) + return linear_headroom; + + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) + return linear_headroom; + + if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) + return linear_headroom; + + return 0; +} + +u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +{ + bool is_mpwqe = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE); + u16 stop_room; + + stop_room = mlx5e_ktls_get_stop_room(mdev, params); + stop_room += mlx5e_stop_room_for_max_wqe(mdev); + if (is_mpwqe) + /* A MPWQE can take up to the maximum cacheline-aligned WQE + + * all the normal stop room can be taken if a new packet breaks + * the active MPWQE session and allocates its WQEs right away. + */ + stop_room += mlx5e_stop_room_for_mpwqe(mdev); + + return stop_room; +} + +int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +{ + size_t sq_size = 1 << params->log_sq_size; + u16 stop_room; + + stop_room = mlx5e_calc_sq_stop_room(mdev, params); + if (stop_room >= sq_size) { + mlx5_core_err(mdev, "Stop room %u is bigger than the SQ size %zu\n", + stop_room, sq_size); + return -EINVAL; + } + + return 0; +} + +static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) +{ + struct dim_cq_moder moder = {}; + + moder.cq_period_mode = cq_period_mode; + moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; + moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; + if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) + moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE; + + return moder; +} + +static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) +{ + struct dim_cq_moder moder = {}; + + moder.cq_period_mode = cq_period_mode; + moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; + moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; + if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) + moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; + + return moder; +} + +static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode) +{ + return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ? + DIM_CQ_PERIOD_MODE_START_FROM_CQE : + DIM_CQ_PERIOD_MODE_START_FROM_EQE; +} + +void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode) +{ + if (params->tx_dim_enabled) { + u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode); + + params->tx_cq_moderation = net_dim_get_def_tx_moderation(dim_period_mode); + } else { + params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode); + } +} + +void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode) +{ + if (params->rx_dim_enabled) { + u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode); + + params->rx_cq_moderation = net_dim_get_def_rx_moderation(dim_period_mode); + } else { + params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode); + } +} + +void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) +{ + mlx5e_reset_tx_moderation(params, cq_period_mode); + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER, + params->tx_cq_moderation.cq_period_mode == + MLX5_CQ_PERIOD_MODE_START_FROM_CQE); +} + +void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) +{ + mlx5e_reset_rx_moderation(params, cq_period_mode); + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER, + params->rx_cq_moderation.cq_period_mode == + MLX5_CQ_PERIOD_MODE_START_FROM_CQE); +} + +bool slow_pci_heuristic(struct mlx5_core_dev *mdev) +{ + u32 link_speed = 0; + u32 pci_bw = 0; + + mlx5_port_max_linkspeed(mdev, &link_speed); + pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL); + mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n", + link_speed, pci_bw); + +#define MLX5E_SLOW_PCI_RATIO (2) + + return link_speed && pci_bw && + link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw; +} + +int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +{ + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, NULL); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, NULL); + + if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode)) + return -EOPNOTSUPP; + + return 0; +} + +int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + u16 max_mtu_pkts; + + if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode)) { + mlx5_core_err(mdev, "Striding RQ for XSK can't be activated with page_shift %u and umr_mode %d\n", + page_shift, umr_mode); + return -EOPNOTSUPP; + } + + if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) { + mlx5_core_err(mdev, "Striding RQ linear mode for XSK can't be activated with current params\n"); + return -EINVAL; + } + + /* Current RQ length is too big for the given frame size, the + * needed number of WQEs exceeds the maximum. + */ + max_mtu_pkts = min_t(u8, MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE, + mlx5e_mpwrq_max_log_rq_pkts(mdev, page_shift, xsk->unaligned)); + if (params->log_rq_mtu_frames > max_mtu_pkts) { + mlx5_core_err(mdev, "Current RQ length %d is too big for XSK with given frame size %u\n", + 1 << params->log_rq_mtu_frames, xsk->chunk_size); + return -EINVAL; + } + + return 0; +} + +void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + params->log_rq_mtu_frames = is_kdump_kernel() ? + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : + MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; +} + +void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +{ + params->rq_wq_type = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ? + MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : + MLX5_WQ_TYPE_CYCLIC; +} + +void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + /* Prefer Striding RQ, unless any of the following holds: + * - Striding RQ configuration is not possible/supported. + * - CQE compression is ON, and stride_index mini_cqe layout is not supported. + * - Legacy RQ would use linear SKB while Striding RQ would use non-linear. + * + * No XSK params: checking the availability of striding RQ in general. + */ + if ((!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) || + MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index)) && + !mlx5e_mpwrq_validate_regular(mdev, params) && + (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) || + !mlx5e_rx_is_linear_skb(mdev, params, NULL))) + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true); + mlx5e_set_rq_type(mdev, params); + mlx5e_init_rq_type_params(mdev, params); +} + +/* Build queue parameters */ + +void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c) +{ + *ccp = (struct mlx5e_create_cq_param) { + .napi = &c->napi, + .ch_stats = c->stats, + .node = cpu_to_node(c->cpu), + .ix = c->ix, + }; +} + +static int mlx5e_max_nonlinear_mtu(int first_frag_size, int frag_size, bool xdp) +{ + if (xdp) + /* XDP requires all fragments to be of the same size. */ + return first_frag_size + (MLX5E_MAX_RX_FRAGS - 1) * frag_size; + + /* Optimization for small packets: the last fragment is bigger than the others. */ + return first_frag_size + (MLX5E_MAX_RX_FRAGS - 2) * frag_size + PAGE_SIZE; +} + +static void mlx5e_rx_compute_wqe_bulk_params(struct mlx5e_params *params, + struct mlx5e_rq_frags_info *info) +{ + u16 bulk_bound_rq_size = (1 << params->log_rq_mtu_frames) / 4; + u32 bulk_bound_rq_size_in_bytes; + u32 sum_frag_strides = 0; + u32 wqe_bulk_in_bytes; + u16 split_factor; + u32 wqe_bulk; + int i; + + for (i = 0; i < info->num_frags; i++) + sum_frag_strides += info->arr[i].frag_stride; + + /* For MTUs larger than PAGE_SIZE, align to PAGE_SIZE to reflect + * amount of consumed pages per wqe in bytes. + */ + if (sum_frag_strides > PAGE_SIZE) + sum_frag_strides = ALIGN(sum_frag_strides, PAGE_SIZE); + + bulk_bound_rq_size_in_bytes = bulk_bound_rq_size * sum_frag_strides; + +#define MAX_WQE_BULK_BYTES(xdp) ((xdp ? 256 : 512) * 1024) + + /* A WQE bulk should not exceed min(512KB, 1/4 of rq size). For XDP + * keep bulk size smaller to avoid filling the page_pool cache on + * every bulk refill. + */ + wqe_bulk_in_bytes = min_t(u32, MAX_WQE_BULK_BYTES(params->xdp_prog), + bulk_bound_rq_size_in_bytes); + wqe_bulk = DIV_ROUND_UP(wqe_bulk_in_bytes, sum_frag_strides); + + /* Make sure that allocations don't start when the page is still used + * by older WQEs. + */ + info->wqe_bulk = max_t(u16, info->wqe_index_mask + 1, wqe_bulk); + + split_factor = DIV_ROUND_UP(MAX_WQE_BULK_BYTES(params->xdp_prog), + PP_ALLOC_CACHE_REFILL * PAGE_SIZE); + info->refill_unit = DIV_ROUND_UP(info->wqe_bulk, split_factor); +} + +#define DEFAULT_FRAG_SIZE (2048) + +static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_frags_info *info, + u32 *xdp_frag_size) +{ + u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu); + int frag_size_max = DEFAULT_FRAG_SIZE; + int first_frag_size_max; + u32 buf_size = 0; + u16 headroom; + int max_mtu; + int i; + + if (mlx5e_rx_is_linear_skb(mdev, params, xsk)) { + int frag_stride; + + frag_stride = mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, false); + + info->arr[0].frag_size = byte_count; + info->arr[0].frag_stride = frag_stride; + info->num_frags = 1; + + /* N WQEs share the same page, N = PAGE_SIZE / frag_stride. The + * first WQE in the page is responsible for allocation of this + * page, this WQE's index is k*N. If WQEs [k*N+1; k*N+N-1] are + * still not completed, the allocation must stop before k*N. + */ + info->wqe_index_mask = (PAGE_SIZE / frag_stride) - 1; + + goto out; + } + + headroom = mlx5e_get_linear_rq_headroom(params, xsk); + first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom); + + max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max, + params->xdp_prog); + if (byte_count > max_mtu || params->xdp_prog) { + frag_size_max = PAGE_SIZE; + first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom); + + max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max, + params->xdp_prog); + if (byte_count > max_mtu) { + mlx5_core_err(mdev, "MTU %u is too big for non-linear legacy RQ (max %d)\n", + params->sw_mtu, max_mtu); + return -EINVAL; + } + } + + i = 0; + while (buf_size < byte_count) { + int frag_size = byte_count - buf_size; + + if (i == 0) + frag_size = min(frag_size, first_frag_size_max); + else if (i < MLX5E_MAX_RX_FRAGS - 1) + frag_size = min(frag_size, frag_size_max); + + info->arr[i].frag_size = frag_size; + buf_size += frag_size; + + if (params->xdp_prog) { + /* XDP multi buffer expects fragments of the same size. */ + info->arr[i].frag_stride = frag_size_max; + } else { + if (i == 0) { + /* Ensure that headroom and tailroom are included. */ + frag_size += headroom; + frag_size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + } + info->arr[i].frag_stride = roundup_pow_of_two(frag_size); + } + + i++; + } + info->num_frags = i; + + /* The last fragment of WQE with index 2*N may share the page with the + * first fragment of WQE with index 2*N+1 in certain cases. If WQE 2*N+1 + * is not completed yet, WQE 2*N must not be allocated, as it's + * responsible for allocating a new page. + */ + if (frag_size_max == PAGE_SIZE) { + /* No WQE can start in the middle of a page. */ + info->wqe_index_mask = 0; + } else { + /* PAGE_SIZEs starting from 8192 don't use 2K-sized fragments, + * because there would be more than MLX5E_MAX_RX_FRAGS of them. + */ + WARN_ON(PAGE_SIZE != 2 * DEFAULT_FRAG_SIZE); + + /* Odd number of fragments allows to pack the last fragment of + * the previous WQE and the first fragment of the next WQE into + * the same page. + * As long as DEFAULT_FRAG_SIZE is 2048, and MLX5E_MAX_RX_FRAGS + * is 4, the last fragment can be bigger than the rest only if + * it's the fourth one, so WQEs consisting of 3 fragments will + * always share a page. + * When a page is shared, WQE bulk size is 2, otherwise just 1. + */ + info->wqe_index_mask = info->num_frags % 2; + } + +out: + /* Bulking optimization to skip allocation until a large enough number + * of WQEs can be allocated in a row. Bulking also influences how well + * deferred page release works. + */ + mlx5e_rx_compute_wqe_bulk_params(params, info); + + mlx5_core_dbg(mdev, "%s: wqe_bulk = %u, wqe_bulk_refill_unit = %u\n", + __func__, info->wqe_bulk, info->refill_unit); + + info->log_num_frags = order_base_2(info->num_frags); + + *xdp_frag_size = info->num_frags > 1 && params->xdp_prog ? PAGE_SIZE : 0; + + return 0; +} + +static u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs) +{ + int sz = sizeof(struct mlx5_wqe_data_seg) * ndsegs; + + switch (wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + sz += sizeof(struct mlx5e_rx_wqe_ll); + break; + default: /* MLX5_WQ_TYPE_CYCLIC */ + sz += sizeof(struct mlx5e_rx_wqe_cyc); + } + + return order_base_2(sz); +} + +static void mlx5e_build_common_cq_param(struct mlx5_core_dev *mdev, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); + if (MLX5_CAP_GEN(mdev, cqe_128_always) && cache_line_size() >= 128) + MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD); +} + +static u32 mlx5e_shampo_get_log_cq_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + int rsrv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * PAGE_SIZE; + u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk)); + int pkt_per_rsrv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params)); + u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + int wq_size = BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk)); + int wqe_size = BIT(log_stride_sz) * num_strides; + + /* +1 is for the case that the pkt_per_rsrv dont consume the reservation + * so we get a filler cqe for the rest of the reservation. + */ + return order_base_2((wqe_size / rsrv_size) * wq_size * (pkt_per_rsrv + 1)); +} + +static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_cq_param *param) +{ + bool hw_stridx = false; + void *cqc = param->cqc; + u8 log_cq_size; + + switch (params->rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + hw_stridx = MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index); + if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) + log_cq_size = mlx5e_shampo_get_log_cq_size(mdev, params, xsk); + else + log_cq_size = mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk) + + mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); + break; + default: /* MLX5_WQ_TYPE_CYCLIC */ + log_cq_size = params->log_rq_mtu_frames; + } + + MLX5_SET(cqc, cqc, log_cq_size, log_cq_size); + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { + MLX5_SET(cqc, cqc, mini_cqe_res_format, hw_stridx ? + MLX5_CQE_FORMAT_CSUM_STRIDX : MLX5_CQE_FORMAT_CSUM); + MLX5_SET(cqc, cqc, cqe_compression_layout, + MLX5_CAP_GEN(mdev, enhanced_cqe_compression) ? + MLX5_CQE_COMPRESS_LAYOUT_ENHANCED : + MLX5_CQE_COMPRESS_LAYOUT_BASIC); + MLX5_SET(cqc, cqc, cqe_comp_en, 1); + } + + mlx5e_build_common_cq_param(mdev, param); + param->cq_period_mode = params->rx_cq_moderation.cq_period_mode; +} + +static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +{ + bool lro_en = params->packet_merge.type == MLX5E_PACKET_MERGE_LRO; + bool ro = MLX5_CAP_GEN(mdev, relaxed_ordering_write); + + return ro && lro_en ? + MLX5_WQ_END_PAD_MODE_NONE : MLX5_WQ_END_PAD_MODE_ALIGN; +} + +int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + u16 q_counter, + struct mlx5e_rq_param *param) +{ + void *rqc = param->rqc; + void *wq = MLX5_ADDR_OF(rqc, rqc, wq); + int ndsegs = 1; + int err; + + switch (params->rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: { + u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); + u8 log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + + if (!mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size, + log_wqe_num_of_strides, + page_shift, umr_mode)) { + mlx5_core_err(mdev, + "Bad RX MPWQE params: log_stride_size %u, log_num_strides %u, umr_mode %d\n", + log_wqe_stride_size, log_wqe_num_of_strides, + umr_mode); + return -EINVAL; + } + + MLX5_SET(wq, wq, log_wqe_num_of_strides, + log_wqe_num_of_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE); + MLX5_SET(wq, wq, log_wqe_stride_size, + log_wqe_stride_size - MLX5_MPWQE_LOG_STRIDE_SZ_BASE); + MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk)); + if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { + MLX5_SET(wq, wq, shampo_enable, true); + MLX5_SET(wq, wq, log_reservation_size, + mlx5e_shampo_get_log_rsrv_size(mdev, params)); + MLX5_SET(wq, wq, + log_max_num_of_packets_per_reservation, + mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params)); + MLX5_SET(wq, wq, log_headers_entry_size, + mlx5e_shampo_get_log_hd_entry_size(mdev, params)); + MLX5_SET(rqc, rqc, reservation_timeout, + params->packet_merge.timeout); + MLX5_SET(rqc, rqc, shampo_match_criteria_type, + params->packet_merge.shampo.match_criteria_type); + MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity, + params->packet_merge.shampo.alignment_granularity); + } + break; + } + default: /* MLX5_WQ_TYPE_CYCLIC */ + MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames); + err = mlx5e_build_rq_frags_info(mdev, params, xsk, ¶m->frags_info, + ¶m->xdp_frag_size); + if (err) + return err; + ndsegs = param->frags_info.num_frags; + } + + MLX5_SET(wq, wq, wq_type, params->rq_wq_type); + MLX5_SET(wq, wq, end_padding_mode, rq_end_pad_mode(mdev, params)); + MLX5_SET(wq, wq, log_wq_stride, + mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs)); + MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn); + MLX5_SET(rqc, rqc, counter_set_id, q_counter); + MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable); + MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en); + + param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); + mlx5e_build_rx_cq_param(mdev, params, xsk, ¶m->cqp); + + return 0; +} + +void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev, + u16 q_counter, + struct mlx5e_rq_param *param) +{ + void *rqc = param->rqc; + void *wq = MLX5_ADDR_OF(rqc, rqc, wq); + + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, log_wq_stride, + mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1)); + MLX5_SET(rqc, rqc, counter_set_id, q_counter); + + param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); +} + +void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size); + + mlx5e_build_common_cq_param(mdev, param); + param->cq_period_mode = params->tx_cq_moderation.cq_period_mode; +} + +void mlx5e_build_sq_param_common(struct mlx5_core_dev *mdev, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); + MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn); + + param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); +} + +void mlx5e_build_sq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + bool allow_swp; + + allow_swp = mlx5_geneve_tx_allowed(mdev) || + (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO); + mlx5e_build_sq_param_common(mdev, param); + MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); + MLX5_SET(sqc, sqc, allow_swp, allow_swp); + param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE); + param->stop_room = mlx5e_calc_sq_stop_room(mdev, params); + mlx5e_build_tx_cq_param(mdev, params, ¶m->cqp); +} + +static void mlx5e_build_ico_cq_param(struct mlx5_core_dev *mdev, + u8 log_wq_size, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, log_cq_size, log_wq_size); + + mlx5e_build_common_cq_param(mdev, param); + + param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; +} + +/* This function calculates the maximum number of headers entries that are needed + * per WQE, the formula is based on the size of the reservations and the + * restriction we have about max packets for reservation that is equal to max + * headers per reservation. + */ +u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_rq_param *rq_param) +{ + int resv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * PAGE_SIZE; + u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, NULL)); + int pkt_per_resv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params)); + u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL); + int wqe_size = BIT(log_stride_sz) * num_strides; + u32 hd_per_wqe; + + /* Assumption: hd_per_wqe % 8 == 0. */ + hd_per_wqe = (wqe_size / resv_size) * pkt_per_resv; + mlx5_core_dbg(mdev, "%s hd_per_wqe = %d rsrv_size = %d wqe_size = %d pkt_per_resv = %d\n", + __func__, hd_per_wqe, resv_size, wqe_size, pkt_per_resv); + return hd_per_wqe; +} + +/* This function calculates the maximum number of headers entries that are needed + * for the WQ, this value is uesed to allocate the header buffer in HW, thus + * must be a pow of 2. + */ +u32 mlx5e_shampo_hd_per_wq(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_rq_param *rq_param) +{ + void *wqc = MLX5_ADDR_OF(rqc, rq_param->rqc, wq); + int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz)); + u32 hd_per_wqe, hd_per_wq; + + hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param); + hd_per_wq = roundup_pow_of_two(hd_per_wqe * wq_size); + return hd_per_wq; +} + +static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_rq_param *rq_param) +{ + int max_num_of_umr_per_wqe, max_hd_per_wqe, max_klm_per_umr, rest; + void *wqc = MLX5_ADDR_OF(rqc, rq_param->rqc, wq); + int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz)); + u32 wqebbs; + + max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE(mdev); + max_hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param); + max_num_of_umr_per_wqe = max_hd_per_wqe / max_klm_per_umr; + rest = max_hd_per_wqe % max_klm_per_umr; + wqebbs = MLX5E_KLM_UMR_WQEBBS(max_klm_per_umr) * max_num_of_umr_per_wqe; + if (rest) + wqebbs += MLX5E_KLM_UMR_WQEBBS(rest); + wqebbs *= wq_size; + return wqebbs; +} + +static u32 mlx5e_mpwrq_total_umr_wqebbs(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + u8 umr_wqebbs; + + umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift, umr_mode); + + return umr_wqebbs * (1 << mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk)); +} + +static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_rq_param *rqp) +{ + u32 wqebbs, total_pages, useful_space; + + /* MLX5_WQ_TYPE_CYCLIC */ + if (params->rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; + + /* UMR WQEs for the regular RQ. */ + wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, NULL); + + /* If XDP program is attached, XSK may be turned on at any time without + * restarting the channel. ICOSQ must be big enough to fit UMR WQEs of + * both regular RQ and XSK RQ. + * + * XSK uses different values of page_shift, and the total number of UMR + * WQEBBs depends on it. This dependency is complex and not monotonic, + * especially taking into consideration that some of the parameters come + * from capabilities. Hence, we have to try all valid values of XSK + * frame size (and page_shift) to find the maximum. + */ + if (params->xdp_prog) { + u32 max_xsk_wqebbs = 0; + u8 frame_shift; + + for (frame_shift = XDP_UMEM_MIN_CHUNK_SHIFT; + frame_shift <= PAGE_SHIFT; frame_shift++) { + /* The headroom doesn't affect the calculation. */ + struct mlx5e_xsk_param xsk = { + .chunk_size = 1 << frame_shift, + .unaligned = false, + }; + + /* XSK aligned mode. */ + max_xsk_wqebbs = max(max_xsk_wqebbs, + mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk)); + + /* XSK unaligned mode, frame size is a power of two. */ + xsk.unaligned = true; + max_xsk_wqebbs = max(max_xsk_wqebbs, + mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk)); + + /* XSK unaligned mode, frame size is not equal to stride size. */ + xsk.chunk_size -= 1; + max_xsk_wqebbs = max(max_xsk_wqebbs, + mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk)); + + /* XSK unaligned mode, frame size is a triple power of two. */ + xsk.chunk_size = (1 << frame_shift) / 4 * 3; + max_xsk_wqebbs = max(max_xsk_wqebbs, + mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk)); + } + + wqebbs += max_xsk_wqebbs; + } + + if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) + wqebbs += mlx5e_shampo_icosq_sz(mdev, params, rqp); + + /* UMR WQEs don't cross the page boundary, they are padded with NOPs. + * This padding is always smaller than the max WQE size. That gives us + * at least (PAGE_SIZE - (max WQE size - MLX5_SEND_WQE_BB)) useful bytes + * per page. The number of pages is estimated as the total size of WQEs + * divided by the useful space in page, rounding up. If some WQEs don't + * fully fit into the useful space, they can occupy part of the padding, + * which proves this estimation to be correct (reserve enough space). + */ + useful_space = PAGE_SIZE - mlx5e_get_max_sq_wqebbs(mdev) + MLX5_SEND_WQE_BB; + total_pages = DIV_ROUND_UP(wqebbs * MLX5_SEND_WQE_BB, useful_space); + wqebbs = total_pages * (PAGE_SIZE / MLX5_SEND_WQE_BB); + + return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE, order_base_2(wqebbs)); +} + +static u8 mlx5e_build_async_icosq_log_wq_sz(struct mlx5_core_dev *mdev) +{ + if (mlx5e_is_ktls_rx(mdev)) + return MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; + + return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; +} + +static void mlx5e_build_icosq_param(struct mlx5_core_dev *mdev, + u8 log_wq_size, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(mdev, param); + + MLX5_SET(wq, wq, log_wq_sz, log_wq_size); + MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq)); + mlx5e_build_ico_cq_param(mdev, log_wq_size, ¶m->cqp); +} + +static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev, + u8 log_wq_size, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(mdev, param); + param->stop_room = mlx5e_stop_room_for_wqe(mdev, 1); /* for XSK NOP */ + param->is_tls = mlx5e_is_ktls_rx(mdev); + if (param->is_tls) + param->stop_room += mlx5e_stop_room_for_wqe(mdev, 1); /* for TLS RX resync NOP */ + MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq)); + MLX5_SET(wq, wq, log_wq_sz, log_wq_size); + mlx5e_build_ico_cq_param(mdev, log_wq_size, ¶m->cqp); +} + +void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(mdev, param); + MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); + param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE); + param->is_xdp_mb = !mlx5e_rx_is_linear_skb(mdev, params, xsk); + mlx5e_build_tx_cq_param(mdev, params, ¶m->cqp); +} + +int mlx5e_build_channel_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + u16 q_counter, + struct mlx5e_channel_param *cparam) +{ + u8 icosq_log_wq_sz, async_icosq_log_wq_sz; + int err; + + err = mlx5e_build_rq_param(mdev, params, NULL, q_counter, &cparam->rq); + if (err) + return err; + + icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(mdev, params, &cparam->rq); + async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(mdev); + + mlx5e_build_sq_param(mdev, params, &cparam->txq_sq); + mlx5e_build_xdpsq_param(mdev, params, NULL, &cparam->xdp_sq); + mlx5e_build_icosq_param(mdev, icosq_log_wq_sz, &cparam->icosq); + mlx5e_build_async_icosq_param(mdev, async_icosq_log_wq_sz, &cparam->async_icosq); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h new file mode 100644 index 0000000000..6800949daf --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -0,0 +1,175 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_PARAMS_H__ +#define __MLX5_EN_PARAMS_H__ + +#include "en.h" + +struct mlx5e_xsk_param { + u16 headroom; + u16 chunk_size; + bool unaligned; +}; + +struct mlx5e_cq_param { + u32 cqc[MLX5_ST_SZ_DW(cqc)]; + struct mlx5_wq_param wq; + u16 eq_ix; + u8 cq_period_mode; +}; + +struct mlx5e_rq_param { + struct mlx5e_cq_param cqp; + u32 rqc[MLX5_ST_SZ_DW(rqc)]; + struct mlx5_wq_param wq; + struct mlx5e_rq_frags_info frags_info; + u32 xdp_frag_size; +}; + +struct mlx5e_sq_param { + struct mlx5e_cq_param cqp; + u32 sqc[MLX5_ST_SZ_DW(sqc)]; + struct mlx5_wq_param wq; + bool is_mpw; + bool is_tls; + bool is_xdp_mb; + u16 stop_room; +}; + +struct mlx5e_channel_param { + struct mlx5e_rq_param rq; + struct mlx5e_sq_param txq_sq; + struct mlx5e_sq_param xdp_sq; + struct mlx5e_sq_param icosq; + struct mlx5e_sq_param async_icosq; +}; + +struct mlx5e_create_sq_param { + struct mlx5_wq_ctrl *wq_ctrl; + u32 cqn; + u32 ts_cqe_to_dest_cqn; + u32 tisn; + u8 tis_lst_sz; + u8 min_inline_mode; +}; + +/* Striding RQ dynamic parameters */ + +u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk); +enum mlx5e_mpwrq_umr_mode +mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk); +u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode); +u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); +u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); +u16 mlx5e_mpwrq_umr_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); +u8 mlx5e_mpwrq_umr_wqebbs(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); +u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); +u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev, + enum mlx5e_mpwrq_umr_mode umr_mode); +u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode); + +/* Parameter calculations */ + +void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode); +void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode); +void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); +void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); + +bool slow_pci_heuristic(struct mlx5_core_dev *mdev); +int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); + +u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +u8 mlx5e_shampo_get_log_hd_entry_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params); +u8 mlx5e_shampo_get_log_rsrv_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params); +u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5_core_dev *mdev, + struct mlx5e_params *params); +u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_rq_param *rq_param); +u32 mlx5e_shampo_hd_per_wq(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_rq_param *rq_param); +u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz); +u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); + +/* Build queue parameters */ + +void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c); +int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + u16 q_counter, + struct mlx5e_rq_param *param); +void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev, + u16 q_counter, + struct mlx5e_rq_param *param); +void mlx5e_build_sq_param_common(struct mlx5_core_dev *mdev, + struct mlx5e_sq_param *param); +void mlx5e_build_sq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_sq_param *param); +void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_cq_param *param); +void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_sq_param *param); +int mlx5e_build_channel_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + u16 q_counter, + struct mlx5e_channel_param *cparam); + +u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +bool mlx5e_verify_params_rx_mpwqe_strides(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); + +static inline void mlx5e_params_print_info(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d %s)\n", + params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, + params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? + BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, NULL)) : + BIT(params->log_rq_mtu_frames), + BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)), + MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS), + MLX5_CAP_GEN(mdev, enhanced_cqe_compression) ? + "enhanced" : "basic"); +}; + +#endif /* __MLX5_EN_PARAMS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c new file mode 100644 index 0000000000..dbe2b19a95 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -0,0 +1,515 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "port.h" + +void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, + u8 *an_disable_cap, u8 *an_disable_admin) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + + *an_status = 0; + *an_disable_cap = 0; + *an_disable_admin = 0; + + if (mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, 1)) + return; + + *an_status = MLX5_GET(ptys_reg, out, an_status); + *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap); + *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); +} + +int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, + u32 proto_admin, bool ext) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + u32 in[MLX5_ST_SZ_DW(ptys_reg)]; + u8 an_disable_admin; + u8 an_disable_cap; + u8 an_status; + + mlx5_port_query_eth_autoneg(dev, &an_status, &an_disable_cap, + &an_disable_admin); + if (!an_disable_cap && an_disable) + return -EPERM; + + memset(in, 0, sizeof(in)); + + MLX5_SET(ptys_reg, in, local_port, 1); + MLX5_SET(ptys_reg, in, an_disable_admin, an_disable); + MLX5_SET(ptys_reg, in, proto_mask, MLX5_PTYS_EN); + if (ext) + MLX5_SET(ptys_reg, in, ext_eth_proto_admin, proto_admin); + else + MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PTYS, 0, 1); +} + +int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) +{ + struct mlx5_port_eth_proto eproto; + bool force_legacy = false; + bool ext; + int err; + + ext = mlx5_ptys_ext_supported(mdev); + err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); + if (err) + goto out; + if (ext && !eproto.admin) { + force_legacy = true; + err = mlx5_port_query_eth_proto(mdev, 1, false, &eproto); + if (err) + goto out; + } + *speed = mlx5_port_ptys2speed(mdev, eproto.oper, force_legacy); + if (!(*speed)) + err = -EINVAL; + +out: + return err; +} + +int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out) +{ + int sz = MLX5_ST_SZ_BYTES(pbmc_reg); + void *in; + int err; + + in = kzalloc(sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(pbmc_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PBMC, 0, 0); + + kfree(in); + return err; +} + +int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in) +{ + int sz = MLX5_ST_SZ_BYTES(pbmc_reg); + void *out; + int err; + + out = kzalloc(sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(pbmc_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PBMC, 0, 1); + + kfree(out); + return err; +} + +int mlx5e_port_query_sbpr(struct mlx5_core_dev *mdev, u32 desc, u8 dir, + u8 pool_idx, void *out, int size_out) +{ + u32 in[MLX5_ST_SZ_DW(sbpr_reg)] = {}; + + MLX5_SET(sbpr_reg, in, desc, desc); + MLX5_SET(sbpr_reg, in, dir, dir); + MLX5_SET(sbpr_reg, in, pool, pool_idx); + + return mlx5_core_access_reg(mdev, in, sizeof(in), out, size_out, MLX5_REG_SBPR, 0, 0); +} + +int mlx5e_port_set_sbpr(struct mlx5_core_dev *mdev, u32 desc, u8 dir, + u8 pool_idx, u32 infi_size, u32 size) +{ + u32 out[MLX5_ST_SZ_DW(sbpr_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(sbpr_reg)] = {}; + + MLX5_SET(sbpr_reg, in, desc, desc); + MLX5_SET(sbpr_reg, in, dir, dir); + MLX5_SET(sbpr_reg, in, pool, pool_idx); + MLX5_SET(sbpr_reg, in, infi_size, infi_size); + MLX5_SET(sbpr_reg, in, size, size); + MLX5_SET(sbpr_reg, in, mode, 1); + + return mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out), MLX5_REG_SBPR, 0, 1); +} + +static int mlx5e_port_query_sbcm(struct mlx5_core_dev *mdev, u32 desc, + u8 pg_buff_idx, u8 dir, void *out, + int size_out) +{ + u32 in[MLX5_ST_SZ_DW(sbcm_reg)] = {}; + + MLX5_SET(sbcm_reg, in, desc, desc); + MLX5_SET(sbcm_reg, in, local_port, 1); + MLX5_SET(sbcm_reg, in, pg_buff, pg_buff_idx); + MLX5_SET(sbcm_reg, in, dir, dir); + + return mlx5_core_access_reg(mdev, in, sizeof(in), out, size_out, MLX5_REG_SBCM, 0, 0); +} + +int mlx5e_port_set_sbcm(struct mlx5_core_dev *mdev, u32 desc, u8 pg_buff_idx, + u8 dir, u8 infi_size, u32 max_buff, u8 pool_idx) +{ + u32 out[MLX5_ST_SZ_DW(sbcm_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(sbcm_reg)] = {}; + u32 min_buff; + int err; + u8 exc; + + err = mlx5e_port_query_sbcm(mdev, desc, pg_buff_idx, dir, out, + sizeof(out)); + if (err) + return err; + + exc = MLX5_GET(sbcm_reg, out, exc); + min_buff = MLX5_GET(sbcm_reg, out, min_buff); + + MLX5_SET(sbcm_reg, in, desc, desc); + MLX5_SET(sbcm_reg, in, local_port, 1); + MLX5_SET(sbcm_reg, in, pg_buff, pg_buff_idx); + MLX5_SET(sbcm_reg, in, dir, dir); + MLX5_SET(sbcm_reg, in, exc, exc); + MLX5_SET(sbcm_reg, in, min_buff, min_buff); + MLX5_SET(sbcm_reg, in, infi_max, infi_size); + MLX5_SET(sbcm_reg, in, max_buff, max_buff); + MLX5_SET(sbcm_reg, in, pool, pool_idx); + + return mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out), MLX5_REG_SBCM, 0, 1); +} + +/* buffer[i]: buffer that priority i mapped to */ +int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer) +{ + int sz = MLX5_ST_SZ_BYTES(pptb_reg); + u32 prio_x_buff; + void *out; + void *in; + int prio; + int err; + + in = kzalloc(sz, GFP_KERNEL); + out = kzalloc(sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(pptb_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 0); + if (err) + goto out; + + prio_x_buff = MLX5_GET(pptb_reg, out, prio_x_buff); + for (prio = 0; prio < 8; prio++) { + buffer[prio] = (u8)(prio_x_buff >> (4 * prio)) & 0xF; + mlx5_core_dbg(mdev, "prio %d, buffer %d\n", prio, buffer[prio]); + } +out: + kfree(in); + kfree(out); + return err; +} + +int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer) +{ + int sz = MLX5_ST_SZ_BYTES(pptb_reg); + u32 prio_x_buff; + void *out; + void *in; + int prio; + int err; + + in = kzalloc(sz, GFP_KERNEL); + out = kzalloc(sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + /* First query the pptb register */ + MLX5_SET(pptb_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 0); + if (err) + goto out; + + memcpy(in, out, sz); + MLX5_SET(pptb_reg, in, local_port, 1); + + /* Update the pm and prio_x_buff */ + MLX5_SET(pptb_reg, in, pm, 0xFF); + + prio_x_buff = 0; + for (prio = 0; prio < 8; prio++) + prio_x_buff |= (buffer[prio] << (4 * prio)); + MLX5_SET(pptb_reg, in, prio_x_buff, prio_x_buff); + + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 1); + +out: + kfree(in); + kfree(out); + return err; +} + +enum mlx5e_fec_supported_link_mode { + MLX5E_FEC_SUPPORTED_LINK_MODES_10G_40G, + MLX5E_FEC_SUPPORTED_LINK_MODES_25G, + MLX5E_FEC_SUPPORTED_LINK_MODES_50G, + MLX5E_FEC_SUPPORTED_LINK_MODES_56G, + MLX5E_FEC_SUPPORTED_LINK_MODES_100G, + MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X, + MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X, + MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X, + MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X, + MLX5E_MAX_FEC_SUPPORTED_LINK_MODE, +}; + +#define MLX5E_FEC_FIRST_50G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X + +#define MLX5E_FEC_OVERRIDE_ADMIN_POLICY(buf, policy, write, link) \ + do { \ + u16 *_policy = &(policy); \ + u32 *_buf = buf; \ + \ + if (write) \ + MLX5_SET(pplm_reg, _buf, fec_override_admin_##link, *_policy); \ + else \ + *_policy = MLX5_GET(pplm_reg, _buf, fec_override_admin_##link); \ + } while (0) + +/* get/set FEC admin field for a given speed */ +static int mlx5e_fec_admin_field(u32 *pplm, u16 *fec_policy, bool write, + enum mlx5e_fec_supported_link_mode link_mode) +{ + switch (link_mode) { + case MLX5E_FEC_SUPPORTED_LINK_MODES_10G_40G: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 10g_40g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_25G: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 25g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_50G: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 50g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_56G: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 56g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_100G: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 100g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 50g_1x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 100g_2x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 200g_4x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 400g_8x); + break; + default: + return -EINVAL; + } + return 0; +} + +#define MLX5E_GET_FEC_OVERRIDE_CAP(buf, link) \ + MLX5_GET(pplm_reg, buf, fec_override_cap_##link) + +/* returns FEC capabilities for a given speed */ +static int mlx5e_get_fec_cap_field(u32 *pplm, u16 *fec_cap, + enum mlx5e_fec_supported_link_mode link_mode) +{ + switch (link_mode) { + case MLX5E_FEC_SUPPORTED_LINK_MODES_10G_40G: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 10g_40g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_25G: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 25g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_50G: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 50g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_56G: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 56g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODES_100G: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 100g); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 50g_1x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 100g_2x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 200g_4x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 400g_8x); + break; + default: + return -EINVAL; + } + return 0; +} + +bool mlx5e_fec_in_caps(struct mlx5_core_dev *dev, int fec_policy) +{ + bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm); + u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(pplm_reg); + int err; + int i; + + if (!MLX5_CAP_GEN(dev, pcam_reg) || !MLX5_CAP_PCAM_REG(dev, pplm)) + return false; + + MLX5_SET(pplm_reg, in, local_port, 1); + err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0); + if (err) + return false; + + for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) { + u16 fec_caps; + + if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane) + break; + + mlx5e_get_fec_cap_field(out, &fec_caps, i); + if (fec_caps & fec_policy) + return true; + } + return false; +} + +int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active, + u16 *fec_configured_mode) +{ + bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm); + u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(pplm_reg); + int err; + int i; + + if (!MLX5_CAP_GEN(dev, pcam_reg)) + return -EOPNOTSUPP; + + if (!MLX5_CAP_PCAM_REG(dev, pplm)) + return -EOPNOTSUPP; + + MLX5_SET(pplm_reg, in, local_port, 1); + err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0); + if (err) + return err; + + *fec_mode_active = MLX5_GET(pplm_reg, out, fec_mode_active); + + if (!fec_configured_mode) + goto out; + + *fec_configured_mode = 0; + for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) { + if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane) + break; + + mlx5e_fec_admin_field(out, fec_configured_mode, 0, i); + if (*fec_configured_mode != 0) + goto out; + } +out: + return 0; +} + +int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy) +{ + bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm); + u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(pplm_reg); + u16 fec_policy_auto = 0; + int err; + int i; + + if (!MLX5_CAP_GEN(dev, pcam_reg)) + return -EOPNOTSUPP; + + if (!MLX5_CAP_PCAM_REG(dev, pplm)) + return -EOPNOTSUPP; + + if (fec_policy >= (1 << MLX5E_FEC_LLRS_272_257_1) && !fec_50g_per_lane) + return -EOPNOTSUPP; + + if (fec_policy && !mlx5e_fec_in_caps(dev, fec_policy)) + return -EOPNOTSUPP; + + MLX5_SET(pplm_reg, in, local_port, 1); + err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0); + if (err) + return err; + + MLX5_SET(pplm_reg, out, local_port, 1); + + for (i = 0; i < MLX5E_MAX_FEC_SUPPORTED_LINK_MODE; i++) { + u16 conf_fec = fec_policy; + u16 fec_caps = 0; + + if (i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE && !fec_50g_per_lane) + break; + + /* RS fec in ethtool is mapped to MLX5E_FEC_RS_528_514 + * to link modes up to 25G per lane and to + * MLX5E_FEC_RS_544_514 in the new link modes based on + * 50 G per lane + */ + if (conf_fec == (1 << MLX5E_FEC_RS_528_514) && + i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE) + conf_fec = (1 << MLX5E_FEC_RS_544_514); + + mlx5e_get_fec_cap_field(out, &fec_caps, i); + + /* policy supported for link speed */ + if (fec_caps & conf_fec) + mlx5e_fec_admin_field(out, &conf_fec, 1, i); + else + /* set FEC to auto*/ + mlx5e_fec_admin_field(out, &fec_policy_auto, 1, i); + } + + return mlx5_core_access_reg(dev, out, sz, out, sz, MLX5_REG_PPLM, 0, 1); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h new file mode 100644 index 0000000000..d1da225f35 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5E_EN_PORT_H +#define __MLX5E_EN_PORT_H + +#include +#include "en.h" + +void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, + u8 *an_disable_cap, u8 *an_disable_admin); +int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, + u32 proto_admin, bool ext); +int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); +int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out); +int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in); +int mlx5e_port_query_sbpr(struct mlx5_core_dev *mdev, u32 desc, u8 dir, + u8 pool_idx, void *out, int size_out); +int mlx5e_port_set_sbpr(struct mlx5_core_dev *mdev, u32 desc, u8 dir, + u8 pool_idx, u32 infi_size, u32 size); +int mlx5e_port_set_sbcm(struct mlx5_core_dev *mdev, u32 desc, u8 pg_buff_idx, + u8 dir, u8 infi_size, u32 max_buff, u8 pool_idx); +int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer); +int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer); + +bool mlx5e_fec_in_caps(struct mlx5_core_dev *dev, int fec_policy); +int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active, + u16 *fec_configured_mode); +int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy); + +enum { + MLX5E_FEC_NOFEC, + MLX5E_FEC_FIRECODE, + MLX5E_FEC_RS_528_514, + MLX5E_FEC_RS_544_514 = 7, + MLX5E_FEC_LLRS_272_257_1 = 9, +}; + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c new file mode 100644 index 0000000000..8e25f4ef5c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -0,0 +1,590 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "port_buffer.h" + +int mlx5e_port_query_buffer(struct mlx5e_priv *priv, + struct mlx5e_port_buffer *port_buffer) +{ + u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz; + struct mlx5_core_dev *mdev = priv->mdev; + int sz = MLX5_ST_SZ_BYTES(pbmc_reg); + u32 total_used = 0; + void *buffer; + void *out; + int err; + int i; + + out = kzalloc(sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5e_port_query_pbmc(mdev, out); + if (err) + goto out; + + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { + buffer = MLX5_ADDR_OF(pbmc_reg, out, buffer[i]); + port_buffer->buffer[i].lossy = + MLX5_GET(bufferx_reg, buffer, lossy); + port_buffer->buffer[i].epsb = + MLX5_GET(bufferx_reg, buffer, epsb); + port_buffer->buffer[i].size = + MLX5_GET(bufferx_reg, buffer, size) * port_buff_cell_sz; + port_buffer->buffer[i].xon = + MLX5_GET(bufferx_reg, buffer, xon_threshold) * port_buff_cell_sz; + port_buffer->buffer[i].xoff = + MLX5_GET(bufferx_reg, buffer, xoff_threshold) * port_buff_cell_sz; + total_used += port_buffer->buffer[i].size; + + netdev_dbg(priv->netdev, "buffer %d: size=%d, xon=%d, xoff=%d, epsb=%d, lossy=%d\n", + i, + port_buffer->buffer[i].size, + port_buffer->buffer[i].xon, + port_buffer->buffer[i].xoff, + port_buffer->buffer[i].epsb, + port_buffer->buffer[i].lossy); + } + + port_buffer->internal_buffers_size = 0; + for (i = MLX5E_MAX_NETWORK_BUFFER; i < MLX5E_TOTAL_BUFFERS; i++) { + buffer = MLX5_ADDR_OF(pbmc_reg, out, buffer[i]); + port_buffer->internal_buffers_size += + MLX5_GET(bufferx_reg, buffer, size) * port_buff_cell_sz; + } + + port_buffer->port_buffer_size = + MLX5_GET(pbmc_reg, out, port_buffer_size) * port_buff_cell_sz; + port_buffer->headroom_size = total_used; + port_buffer->spare_buffer_size = port_buffer->port_buffer_size - + port_buffer->internal_buffers_size - + port_buffer->headroom_size; + + netdev_dbg(priv->netdev, + "total buffer size=%u, headroom buffer size=%u, internal buffers size=%u, spare buffer size=%u\n", + port_buffer->port_buffer_size, port_buffer->headroom_size, + port_buffer->internal_buffers_size, + port_buffer->spare_buffer_size); +out: + kfree(out); + return err; +} + +struct mlx5e_buffer_pool { + u32 infi_size; + u32 size; + u32 buff_occupancy; +}; + +static int mlx5e_port_query_pool(struct mlx5_core_dev *mdev, + struct mlx5e_buffer_pool *buffer_pool, + u32 desc, u8 dir, u8 pool_idx) +{ + u32 out[MLX5_ST_SZ_DW(sbpr_reg)] = {}; + int err; + + err = mlx5e_port_query_sbpr(mdev, desc, dir, pool_idx, out, + sizeof(out)); + if (err) + return err; + + buffer_pool->size = MLX5_GET(sbpr_reg, out, size); + buffer_pool->infi_size = MLX5_GET(sbpr_reg, out, infi_size); + buffer_pool->buff_occupancy = MLX5_GET(sbpr_reg, out, buff_occupancy); + + return err; +} + +enum { + MLX5_INGRESS_DIR = 0, + MLX5_EGRESS_DIR = 1, +}; + +enum { + MLX5_LOSSY_POOL = 0, + MLX5_LOSSLESS_POOL = 1, +}; + +/* No limit on usage of shared buffer pool (max_buff=0) */ +#define MLX5_SB_POOL_NO_THRESHOLD 0 +/* Shared buffer pool usage threshold when calculated + * dynamically in alpha units. alpha=13 is equivalent to + * HW_alpha of [(1/128) * 2 ^ (alpha-1)] = 32, where HW_alpha + * equates to the following portion of the shared buffer pool: + * [32 / (1 + n * 32)] While *n* is the number of buffers + * that are using the shared buffer pool. + */ +#define MLX5_SB_POOL_THRESHOLD 13 + +/* Shared buffer class management parameters */ +struct mlx5_sbcm_params { + u8 pool_idx; + u8 max_buff; + u8 infi_size; +}; + +static const struct mlx5_sbcm_params sbcm_default = { + .pool_idx = MLX5_LOSSY_POOL, + .max_buff = MLX5_SB_POOL_NO_THRESHOLD, + .infi_size = 0, +}; + +static const struct mlx5_sbcm_params sbcm_lossy = { + .pool_idx = MLX5_LOSSY_POOL, + .max_buff = MLX5_SB_POOL_NO_THRESHOLD, + .infi_size = 1, +}; + +static const struct mlx5_sbcm_params sbcm_lossless = { + .pool_idx = MLX5_LOSSLESS_POOL, + .max_buff = MLX5_SB_POOL_THRESHOLD, + .infi_size = 0, +}; + +static const struct mlx5_sbcm_params sbcm_lossless_no_threshold = { + .pool_idx = MLX5_LOSSLESS_POOL, + .max_buff = MLX5_SB_POOL_NO_THRESHOLD, + .infi_size = 1, +}; + +/** + * select_sbcm_params() - selects the shared buffer pool configuration + * + * @buffer: port buffer to retrieve params of + * @lossless_buff_count: number of lossless buffers in total + * + * The selection is based on the following rules: + * 1. If buffer size is 0, no shared buffer pool is used. + * 2. If buffer is lossy, use lossy shared buffer pool. + * 3. If there are more than 1 lossless buffers, use lossless shared buffer pool + * with threshold. + * 4. If there is only 1 lossless buffer, use lossless shared buffer pool + * without threshold. + * + * @return const struct mlx5_sbcm_params* selected values + */ +static const struct mlx5_sbcm_params * +select_sbcm_params(struct mlx5e_bufferx_reg *buffer, u8 lossless_buff_count) +{ + if (buffer->size == 0) + return &sbcm_default; + + if (buffer->lossy) + return &sbcm_lossy; + + if (lossless_buff_count > 1) + return &sbcm_lossless; + + return &sbcm_lossless_no_threshold; +} + +static int port_update_pool_cfg(struct mlx5_core_dev *mdev, + struct mlx5e_port_buffer *port_buffer) +{ + const struct mlx5_sbcm_params *p; + u8 lossless_buff_count = 0; + int err; + int i; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return 0; + + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) + lossless_buff_count += ((port_buffer->buffer[i].size) && + (!(port_buffer->buffer[i].lossy))); + + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { + p = select_sbcm_params(&port_buffer->buffer[i], lossless_buff_count); + err = mlx5e_port_set_sbcm(mdev, 0, i, + MLX5_INGRESS_DIR, + p->infi_size, + p->max_buff, + p->pool_idx); + if (err) + return err; + } + + return 0; +} + +static int port_update_shared_buffer(struct mlx5_core_dev *mdev, + u32 current_headroom_size, + u32 new_headroom_size) +{ + struct mlx5e_buffer_pool lossless_ipool; + struct mlx5e_buffer_pool lossy_epool; + u32 lossless_ipool_size; + u32 shared_buffer_size; + u32 total_buffer_size; + u32 lossy_epool_size; + int err; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return 0; + + err = mlx5e_port_query_pool(mdev, &lossy_epool, 0, MLX5_EGRESS_DIR, + MLX5_LOSSY_POOL); + if (err) + return err; + + err = mlx5e_port_query_pool(mdev, &lossless_ipool, 0, MLX5_INGRESS_DIR, + MLX5_LOSSLESS_POOL); + if (err) + return err; + + total_buffer_size = current_headroom_size + lossy_epool.size + + lossless_ipool.size; + shared_buffer_size = total_buffer_size - new_headroom_size; + + if (shared_buffer_size < 4) { + pr_err("Requested port buffer is too large, not enough space left for shared buffer\n"); + return -EINVAL; + } + + /* Total shared buffer size is split in a ratio of 3:1 between + * lossy and lossless pools respectively. + */ + lossy_epool_size = (shared_buffer_size / 4) * 3; + lossless_ipool_size = shared_buffer_size / 4; + + mlx5e_port_set_sbpr(mdev, 0, MLX5_EGRESS_DIR, MLX5_LOSSY_POOL, 0, + lossy_epool_size); + mlx5e_port_set_sbpr(mdev, 0, MLX5_INGRESS_DIR, MLX5_LOSSLESS_POOL, 0, + lossless_ipool_size); + return 0; +} + +static int port_set_buffer(struct mlx5e_priv *priv, + struct mlx5e_port_buffer *port_buffer) +{ + u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz; + struct mlx5_core_dev *mdev = priv->mdev; + int sz = MLX5_ST_SZ_BYTES(pbmc_reg); + u32 new_headroom_size = 0; + u32 current_headroom_size; + void *in; + int err; + int i; + + current_headroom_size = port_buffer->headroom_size; + + in = kzalloc(sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + err = mlx5e_port_query_pbmc(mdev, in); + if (err) + goto out; + + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { + void *buffer = MLX5_ADDR_OF(pbmc_reg, in, buffer[i]); + u64 size = port_buffer->buffer[i].size; + u64 xoff = port_buffer->buffer[i].xoff; + u64 xon = port_buffer->buffer[i].xon; + + new_headroom_size += size; + do_div(size, port_buff_cell_sz); + do_div(xoff, port_buff_cell_sz); + do_div(xon, port_buff_cell_sz); + MLX5_SET(bufferx_reg, buffer, size, size); + MLX5_SET(bufferx_reg, buffer, lossy, port_buffer->buffer[i].lossy); + MLX5_SET(bufferx_reg, buffer, xoff_threshold, xoff); + MLX5_SET(bufferx_reg, buffer, xon_threshold, xon); + } + + new_headroom_size /= port_buff_cell_sz; + current_headroom_size /= port_buff_cell_sz; + err = port_update_shared_buffer(priv->mdev, current_headroom_size, + new_headroom_size); + if (err) + goto out; + + err = port_update_pool_cfg(priv->mdev, port_buffer); + if (err) + goto out; + + err = mlx5e_port_set_pbmc(mdev, in); +out: + kfree(in); + return err; +} + +/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B]) + * minimum speed value is 40Gbps + */ +static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu) +{ + u32 speed; + u32 xoff; + int err; + + err = mlx5e_port_linkspeed(priv->mdev, &speed); + if (err) + speed = SPEED_40000; + speed = max_t(u32, speed, SPEED_40000); + + xoff = (301 + 216 * priv->dcbx.cable_len / 100) * speed / 1000 + 272 * mtu / 100; + + netdev_dbg(priv->netdev, "%s: xoff=%d\n", __func__, xoff); + return xoff; +} + +static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, + u32 xoff, unsigned int max_mtu, u16 port_buff_cell_sz) +{ + int i; + + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { + if (port_buffer->buffer[i].lossy) { + port_buffer->buffer[i].xoff = 0; + port_buffer->buffer[i].xon = 0; + continue; + } + + if (port_buffer->buffer[i].size < + (xoff + max_mtu + port_buff_cell_sz)) { + pr_err("buffer_size[%d]=%d is not enough for lossless buffer\n", + i, port_buffer->buffer[i].size); + return -ENOMEM; + } + + port_buffer->buffer[i].xoff = port_buffer->buffer[i].size - xoff; + port_buffer->buffer[i].xon = + port_buffer->buffer[i].xoff - max_mtu; + } + + return 0; +} + +/** + * update_buffer_lossy - Update buffer configuration based on pfc + * @mdev: port function core device + * @max_mtu: netdev's max_mtu + * @pfc_en: current pfc configuration + * @buffer: current prio to buffer mapping + * @xoff: xoff value + * @port_buff_cell_sz: port buffer cell_size + * @port_buffer: port receive buffer configuration + * @change: + * + * Update buffer configuration based on pfc configuration and + * priority to buffer mapping. + * Buffer's lossy bit is changed to: + * lossless if there is at least one PFC enabled priority + * mapped to this buffer lossy if all priorities mapped to + * this buffer are PFC disabled + * + * @return: 0 if no error, + * sets change to true if buffer configuration was modified. + */ +static int update_buffer_lossy(struct mlx5_core_dev *mdev, + unsigned int max_mtu, + u8 pfc_en, u8 *buffer, u32 xoff, u16 port_buff_cell_sz, + struct mlx5e_port_buffer *port_buffer, + bool *change) +{ + bool changed = false; + u8 lossy_count; + u8 prio_count; + u8 lossy; + int prio; + int err; + int i; + + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { + prio_count = 0; + lossy_count = 0; + + for (prio = 0; prio < MLX5E_MAX_PRIORITY; prio++) { + if (buffer[prio] != i) + continue; + + prio_count++; + lossy_count += !(pfc_en & (1 << prio)); + } + + if (lossy_count == prio_count) + lossy = 1; + else /* lossy_count < prio_count */ + lossy = 0; + + if (lossy != port_buffer->buffer[i].lossy) { + port_buffer->buffer[i].lossy = lossy; + changed = true; + } + } + + if (changed) { + err = update_xoff_threshold(port_buffer, xoff, max_mtu, port_buff_cell_sz); + if (err) + return err; + + err = port_update_pool_cfg(mdev, port_buffer); + if (err) + return err; + + *change = true; + } + + return 0; +} + +static int fill_pfc_en(struct mlx5_core_dev *mdev, u8 *pfc_en) +{ + u32 g_rx_pause, g_tx_pause; + int err; + + err = mlx5_query_port_pause(mdev, &g_rx_pause, &g_tx_pause); + if (err) + return err; + + /* If global pause enabled, set all active buffers to lossless. + * Otherwise, check PFC setting. + */ + if (g_rx_pause || g_tx_pause) + *pfc_en = 0xff; + else + err = mlx5_query_port_pfc(mdev, pfc_en, NULL); + + return err; +} + +#define MINIMUM_MAX_MTU 9216 +int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, + u32 change, unsigned int mtu, + struct ieee_pfc *pfc, + u32 *buffer_size, + u8 *prio2buffer) +{ + u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz; + struct net_device *netdev = priv->netdev; + struct mlx5e_port_buffer port_buffer; + u32 xoff = calculate_xoff(priv, mtu); + bool update_prio2buffer = false; + u8 buffer[MLX5E_MAX_PRIORITY]; + bool update_buffer = false; + unsigned int max_mtu; + u32 total_used = 0; + u8 curr_pfc_en; + int err; + int i; + + netdev_dbg(netdev, "%s: change=%x\n", __func__, change); + max_mtu = max_t(unsigned int, priv->netdev->max_mtu, MINIMUM_MAX_MTU); + + err = mlx5e_port_query_buffer(priv, &port_buffer); + if (err) + return err; + + if (change & MLX5E_PORT_BUFFER_CABLE_LEN) { + update_buffer = true; + err = update_xoff_threshold(&port_buffer, xoff, max_mtu, port_buff_cell_sz); + if (err) + return err; + } + + if (change & MLX5E_PORT_BUFFER_PFC) { + netdev_dbg(netdev, "%s: requested PFC per priority bitmask: 0x%x\n", + __func__, pfc->pfc_en); + err = mlx5e_port_query_priority2buffer(priv->mdev, buffer); + if (err) + return err; + + err = update_buffer_lossy(priv->mdev, max_mtu, pfc->pfc_en, buffer, xoff, + port_buff_cell_sz, &port_buffer, + &update_buffer); + if (err) + return err; + } + + if (change & MLX5E_PORT_BUFFER_PRIO2BUFFER) { + update_prio2buffer = true; + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) + netdev_dbg(priv->netdev, "%s: requested to map prio[%d] to buffer %d\n", + __func__, i, prio2buffer[i]); + + err = fill_pfc_en(priv->mdev, &curr_pfc_en); + if (err) + return err; + + err = update_buffer_lossy(priv->mdev, max_mtu, curr_pfc_en, prio2buffer, xoff, + port_buff_cell_sz, &port_buffer, &update_buffer); + if (err) + return err; + } + + if (change & MLX5E_PORT_BUFFER_SIZE) { + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { + netdev_dbg(priv->netdev, "%s: buffer[%d]=%d\n", __func__, i, buffer_size[i]); + if (!port_buffer.buffer[i].lossy && !buffer_size[i]) { + netdev_dbg(priv->netdev, "%s: lossless buffer[%d] size cannot be zero\n", + __func__, i); + return -EINVAL; + } + + port_buffer.buffer[i].size = buffer_size[i]; + total_used += buffer_size[i]; + } + + netdev_dbg(priv->netdev, "%s: total buffer requested=%d\n", __func__, total_used); + + if (total_used > port_buffer.headroom_size && + (total_used - port_buffer.headroom_size) > + port_buffer.spare_buffer_size) + return -EINVAL; + + update_buffer = true; + err = update_xoff_threshold(&port_buffer, xoff, max_mtu, port_buff_cell_sz); + if (err) + return err; + } + + /* Need to update buffer configuration if xoff value is changed */ + if (!update_buffer && xoff != priv->dcbx.xoff) { + update_buffer = true; + err = update_xoff_threshold(&port_buffer, xoff, max_mtu, port_buff_cell_sz); + if (err) + return err; + } + priv->dcbx.xoff = xoff; + + /* Apply the settings */ + if (update_buffer) { + err = port_set_buffer(priv, &port_buffer); + if (err) + return err; + } + + if (update_prio2buffer) + err = mlx5e_port_set_priority2buffer(priv->mdev, prio2buffer); + + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h new file mode 100644 index 0000000000..f4a19ffbb6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __MLX5_EN_PORT_BUFFER_H__ +#define __MLX5_EN_PORT_BUFFER_H__ + +#include "en.h" +#include "port.h" + +#define MLX5E_MAX_NETWORK_BUFFER 8 +#define MLX5E_TOTAL_BUFFERS 10 +#define MLX5E_DEFAULT_CABLE_LEN 7 /* 7 meters */ + +#define MLX5_BUFFER_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, pcam_reg) && \ + MLX5_CAP_PCAM_REG(mdev, pbmc) && \ + MLX5_CAP_PCAM_REG(mdev, pptb)) + +enum { + MLX5E_PORT_BUFFER_CABLE_LEN = BIT(0), + MLX5E_PORT_BUFFER_PFC = BIT(1), + MLX5E_PORT_BUFFER_PRIO2BUFFER = BIT(2), + MLX5E_PORT_BUFFER_SIZE = BIT(3), +}; + +struct mlx5e_bufferx_reg { + u8 lossy; + u8 epsb; + u32 size; + u32 xoff; + u32 xon; +}; + +struct mlx5e_port_buffer { + u32 port_buffer_size; + u32 spare_buffer_size; + u32 headroom_size; /* Buffers 0-7 */ + u32 internal_buffers_size; /* Buffers 8-9 */ + struct mlx5e_bufferx_reg buffer[MLX5E_MAX_NETWORK_BUFFER]; +}; + +int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, + u32 change, unsigned int mtu, + struct ieee_pfc *pfc, + u32 *buffer_size, + u8 *prio2buffer); + +int mlx5e_port_query_buffer(struct mlx5e_priv *priv, + struct mlx5e_port_buffer *port_buffer); +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c new file mode 100644 index 0000000000..803035d4e5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -0,0 +1,1012 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2020 Mellanox Technologies + +#include "en/ptp.h" +#include "en/health.h" +#include "en/txrx.h" +#include "en/params.h" +#include "en/fs_tt_redirect.h" +#include +#include + +struct mlx5e_ptp_fs { + struct mlx5_flow_handle *l2_rule; + struct mlx5_flow_handle *udp_v4_rule; + struct mlx5_flow_handle *udp_v6_rule; + bool valid; +}; + +struct mlx5e_ptp_params { + struct mlx5e_params params; + struct mlx5e_sq_param txq_sq_param; + struct mlx5e_rq_param rq_param; +}; + +struct mlx5e_ptp_port_ts_cqe_tracker { + u8 metadata_id; + bool inuse : 1; + struct list_head entry; +}; + +struct mlx5e_ptp_port_ts_cqe_list { + struct mlx5e_ptp_port_ts_cqe_tracker *nodes; + struct list_head tracker_list_head; + /* Sync list operations in xmit and napi_poll contexts */ + spinlock_t tracker_list_lock; +}; + +static inline void +mlx5e_ptp_port_ts_cqe_list_add(struct mlx5e_ptp_port_ts_cqe_list *list, u8 metadata) +{ + struct mlx5e_ptp_port_ts_cqe_tracker *tracker = &list->nodes[metadata]; + + WARN_ON_ONCE(tracker->inuse); + tracker->inuse = true; + spin_lock(&list->tracker_list_lock); + list_add_tail(&tracker->entry, &list->tracker_list_head); + spin_unlock(&list->tracker_list_lock); +} + +static void +mlx5e_ptp_port_ts_cqe_list_remove(struct mlx5e_ptp_port_ts_cqe_list *list, u8 metadata) +{ + struct mlx5e_ptp_port_ts_cqe_tracker *tracker = &list->nodes[metadata]; + + WARN_ON_ONCE(!tracker->inuse); + tracker->inuse = false; + spin_lock(&list->tracker_list_lock); + list_del(&tracker->entry); + spin_unlock(&list->tracker_list_lock); +} + +void mlx5e_ptpsq_track_metadata(struct mlx5e_ptpsq *ptpsq, u8 metadata) +{ + mlx5e_ptp_port_ts_cqe_list_add(ptpsq->ts_cqe_pending_list, metadata); +} + +struct mlx5e_skb_cb_hwtstamp { + ktime_t cqe_hwtstamp; + ktime_t port_hwtstamp; +}; + +void mlx5e_skb_cb_hwtstamp_init(struct sk_buff *skb) +{ + memset(skb->cb, 0, sizeof(struct mlx5e_skb_cb_hwtstamp)); +} + +static struct mlx5e_skb_cb_hwtstamp *mlx5e_skb_cb_get_hwts(struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(struct mlx5e_skb_cb_hwtstamp) > sizeof(skb->cb)); + return (struct mlx5e_skb_cb_hwtstamp *)skb->cb; +} + +static void mlx5e_skb_cb_hwtstamp_tx(struct sk_buff *skb, + struct mlx5e_ptp_cq_stats *cq_stats) +{ + struct skb_shared_hwtstamps hwts = {}; + ktime_t diff; + + diff = abs(mlx5e_skb_cb_get_hwts(skb)->port_hwtstamp - + mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp); + + /* Maximal allowed diff is 1 / 128 second */ + if (diff > (NSEC_PER_SEC >> 7)) { + cq_stats->abort++; + cq_stats->abort_abs_diff_ns += diff; + return; + } + + hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->port_hwtstamp; + skb_tstamp_tx(skb, &hwts); +} + +void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type, + ktime_t hwtstamp, + struct mlx5e_ptp_cq_stats *cq_stats) +{ + switch (hwtstamp_type) { + case (MLX5E_SKB_CB_CQE_HWTSTAMP): + mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp = hwtstamp; + break; + case (MLX5E_SKB_CB_PORT_HWTSTAMP): + mlx5e_skb_cb_get_hwts(skb)->port_hwtstamp = hwtstamp; + break; + } + + /* If both CQEs arrive, check and report the port tstamp, and clear skb cb as + * skb soon to be released. + */ + if (!mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp || + !mlx5e_skb_cb_get_hwts(skb)->port_hwtstamp) + return; + + mlx5e_skb_cb_hwtstamp_tx(skb, cq_stats); + memset(skb->cb, 0, sizeof(struct mlx5e_skb_cb_hwtstamp)); +} + +static struct sk_buff * +mlx5e_ptp_metadata_map_lookup(struct mlx5e_ptp_metadata_map *map, u16 metadata) +{ + return map->data[metadata]; +} + +static struct sk_buff * +mlx5e_ptp_metadata_map_remove(struct mlx5e_ptp_metadata_map *map, u16 metadata) +{ + struct sk_buff *skb; + + skb = map->data[metadata]; + map->data[metadata] = NULL; + + return skb; +} + +static bool mlx5e_ptp_metadata_map_unhealthy(struct mlx5e_ptp_metadata_map *map) +{ + /* Considered beginning unhealthy state if size * 15 / 2^4 cannot be reclaimed. */ + return map->undelivered_counter > (map->capacity >> 4) * 15; +} + +static void mlx5e_ptpsq_mark_ts_cqes_undelivered(struct mlx5e_ptpsq *ptpsq, + ktime_t port_tstamp) +{ + struct mlx5e_ptp_port_ts_cqe_list *cqe_list = ptpsq->ts_cqe_pending_list; + ktime_t timeout = ns_to_ktime(MLX5E_PTP_TS_CQE_UNDELIVERED_TIMEOUT); + struct mlx5e_ptp_metadata_map *metadata_map = &ptpsq->metadata_map; + struct mlx5e_ptp_port_ts_cqe_tracker *pos, *n; + + spin_lock(&cqe_list->tracker_list_lock); + list_for_each_entry_safe(pos, n, &cqe_list->tracker_list_head, entry) { + struct sk_buff *skb = + mlx5e_ptp_metadata_map_lookup(metadata_map, pos->metadata_id); + ktime_t dma_tstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp; + + if (!dma_tstamp || + ktime_after(ktime_add(dma_tstamp, timeout), port_tstamp)) + break; + + metadata_map->undelivered_counter++; + WARN_ON_ONCE(!pos->inuse); + pos->inuse = false; + list_del(&pos->entry); + } + spin_unlock(&cqe_list->tracker_list_lock); +} + +#define PTP_WQE_CTR2IDX(val) ((val) & ptpsq->ts_cqe_ctr_mask) + +static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq, + struct mlx5_cqe64 *cqe, + u8 *md_buff, + u8 *md_buff_sz, + int budget) +{ + struct mlx5e_ptp_port_ts_cqe_list *pending_cqe_list = ptpsq->ts_cqe_pending_list; + u8 metadata_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter)); + bool is_err_cqe = !!MLX5E_RX_ERR_CQE(cqe); + struct mlx5e_txqsq *sq = &ptpsq->txqsq; + struct sk_buff *skb; + ktime_t hwtstamp; + + if (likely(pending_cqe_list->nodes[metadata_id].inuse)) { + mlx5e_ptp_port_ts_cqe_list_remove(pending_cqe_list, metadata_id); + } else { + /* Reclaim space in the unlikely event CQE was delivered after + * marking it late. + */ + ptpsq->metadata_map.undelivered_counter--; + ptpsq->cq_stats->late_cqe++; + } + + skb = mlx5e_ptp_metadata_map_remove(&ptpsq->metadata_map, metadata_id); + + if (unlikely(is_err_cqe)) { + ptpsq->cq_stats->err_cqe++; + goto out; + } + + hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, get_cqe_ts(cqe)); + mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_PORT_HWTSTAMP, + hwtstamp, ptpsq->cq_stats); + ptpsq->cq_stats->cqe++; + + mlx5e_ptpsq_mark_ts_cqes_undelivered(ptpsq, hwtstamp); +out: + napi_consume_skb(skb, budget); + md_buff[(*md_buff_sz)++] = metadata_id; + if (unlikely(mlx5e_ptp_metadata_map_unhealthy(&ptpsq->metadata_map)) && + !test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) + queue_work(ptpsq->txqsq.priv->wq, &ptpsq->report_unhealthy_work); +} + +static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int napi_budget) +{ + struct mlx5e_ptpsq *ptpsq = container_of(cq, struct mlx5e_ptpsq, ts_cq); + int budget = min(napi_budget, MLX5E_TX_CQ_POLL_BUDGET); + u8 metadata_buff[MLX5E_TX_CQ_POLL_BUDGET]; + u8 metadata_buff_sz = 0; + struct mlx5_cqwq *cqwq; + struct mlx5_cqe64 *cqe; + int work_done = 0; + + cqwq = &cq->wq; + + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &ptpsq->txqsq.state))) + return false; + + cqe = mlx5_cqwq_get_cqe(cqwq); + if (!cqe) + return false; + + do { + mlx5_cqwq_pop(cqwq); + + mlx5e_ptp_handle_ts_cqe(ptpsq, cqe, + metadata_buff, &metadata_buff_sz, napi_budget); + } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq))); + + mlx5_cqwq_update_db_record(cqwq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + while (metadata_buff_sz > 0) + mlx5e_ptp_metadata_fifo_push(&ptpsq->metadata_freelist, + metadata_buff[--metadata_buff_sz]); + + mlx5e_txqsq_wake(&ptpsq->txqsq); + + return work_done == budget; +} + +static int mlx5e_ptp_napi_poll(struct napi_struct *napi, int budget) +{ + struct mlx5e_ptp *c = container_of(napi, struct mlx5e_ptp, napi); + struct mlx5e_ch_stats *ch_stats = c->stats; + struct mlx5e_rq *rq = &c->rq; + bool busy = false; + int work_done = 0; + int i; + + rcu_read_lock(); + + ch_stats->poll++; + + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + for (i = 0; i < c->num_tc; i++) { + busy |= mlx5e_poll_tx_cq(&c->ptpsq[i].txqsq.cq, budget); + busy |= mlx5e_ptp_poll_ts_cq(&c->ptpsq[i].ts_cq, budget); + } + } + if (test_bit(MLX5E_PTP_STATE_RX, c->state) && likely(budget)) { + work_done = mlx5e_poll_rx_cq(&rq->cq, budget); + busy |= work_done == budget; + busy |= INDIRECT_CALL_2(rq->post_wqes, + mlx5e_post_rx_mpwqes, + mlx5e_post_rx_wqes, + rq); + } + + if (busy) { + work_done = budget; + goto out; + } + + if (unlikely(!napi_complete_done(napi, work_done))) + goto out; + + ch_stats->arm++; + + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + for (i = 0; i < c->num_tc; i++) { + mlx5e_cq_arm(&c->ptpsq[i].txqsq.cq); + mlx5e_cq_arm(&c->ptpsq[i].ts_cq); + } + } + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) + mlx5e_cq_arm(&rq->cq); + +out: + rcu_read_unlock(); + + return work_done; +} + +static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix, + struct mlx5e_params *params, + struct mlx5e_sq_param *param, + struct mlx5e_txqsq *sq, int tc, + struct mlx5e_ptpsq *ptpsq) +{ + void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); + struct mlx5_core_dev *mdev = c->mdev; + struct mlx5_wq_cyc *wq = &sq->wq; + int err; + int node; + + sq->pdev = c->pdev; + sq->clock = &mdev->clock; + sq->mkey_be = c->mkey_be; + sq->netdev = c->netdev; + sq->priv = c->priv; + sq->mdev = mdev; + sq->ch_ix = MLX5E_PTP_CHANNEL_IX; + sq->txq_ix = txq_ix; + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; + sq->min_inline_mode = params->tx_min_inline_mode; + sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + sq->stats = &c->priv->ptp_stats.sq[tc]; + sq->ptpsq = ptpsq; + INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); + if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) + set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); + sq->stop_room = param->stop_room; + sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev); + + node = dev_to_node(mlx5_core_dma_dev(mdev)); + + param->wq.db_numa_node = node; + err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); + if (err) + return err; + wq->db = &wq->db[MLX5_SND_DBR]; + + err = mlx5e_alloc_txqsq_db(sq, node); + if (err) + goto err_sq_wq_destroy; + + return 0; + +err_sq_wq_destroy: + mlx5_wq_destroy(&sq->wq_ctrl); + + return err; +} + +static void mlx5e_ptp_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn) +{ + mlx5_core_destroy_sq(mdev, sqn); +} + +static int mlx5e_ptp_alloc_traffic_db(struct mlx5e_ptpsq *ptpsq, int numa) +{ + struct mlx5e_ptp_metadata_fifo *metadata_freelist = &ptpsq->metadata_freelist; + struct mlx5e_ptp_metadata_map *metadata_map = &ptpsq->metadata_map; + struct mlx5e_ptp_port_ts_cqe_list *cqe_list; + int db_sz; + int md; + + cqe_list = kvzalloc_node(sizeof(*ptpsq->ts_cqe_pending_list), GFP_KERNEL, numa); + if (!cqe_list) + return -ENOMEM; + ptpsq->ts_cqe_pending_list = cqe_list; + + db_sz = min_t(u32, mlx5_wq_cyc_get_size(&ptpsq->txqsq.wq), + 1 << MLX5_CAP_GEN_2(ptpsq->txqsq.mdev, + ts_cqe_metadata_size2wqe_counter)); + ptpsq->ts_cqe_ctr_mask = db_sz - 1; + + cqe_list->nodes = kvzalloc_node(array_size(db_sz, sizeof(*cqe_list->nodes)), + GFP_KERNEL, numa); + if (!cqe_list->nodes) + goto free_cqe_list; + INIT_LIST_HEAD(&cqe_list->tracker_list_head); + spin_lock_init(&cqe_list->tracker_list_lock); + + metadata_freelist->data = + kvzalloc_node(array_size(db_sz, sizeof(*metadata_freelist->data)), + GFP_KERNEL, numa); + if (!metadata_freelist->data) + goto free_cqe_list_nodes; + metadata_freelist->mask = ptpsq->ts_cqe_ctr_mask; + + for (md = 0; md < db_sz; ++md) { + cqe_list->nodes[md].metadata_id = md; + metadata_freelist->data[md] = md; + } + metadata_freelist->pc = db_sz; + + metadata_map->data = + kvzalloc_node(array_size(db_sz, sizeof(*metadata_map->data)), + GFP_KERNEL, numa); + if (!metadata_map->data) + goto free_metadata_freelist; + metadata_map->capacity = db_sz; + + return 0; + +free_metadata_freelist: + kvfree(metadata_freelist->data); +free_cqe_list_nodes: + kvfree(cqe_list->nodes); +free_cqe_list: + kvfree(cqe_list); + return -ENOMEM; +} + +static void mlx5e_ptp_drain_metadata_map(struct mlx5e_ptp_metadata_map *map) +{ + int idx; + + for (idx = 0; idx < map->capacity; ++idx) { + struct sk_buff *skb = map->data[idx]; + + dev_kfree_skb_any(skb); + } +} + +static void mlx5e_ptp_free_traffic_db(struct mlx5e_ptpsq *ptpsq) +{ + mlx5e_ptp_drain_metadata_map(&ptpsq->metadata_map); + kvfree(ptpsq->metadata_map.data); + kvfree(ptpsq->metadata_freelist.data); + kvfree(ptpsq->ts_cqe_pending_list->nodes); + kvfree(ptpsq->ts_cqe_pending_list); +} + +static void mlx5e_ptpsq_unhealthy_work(struct work_struct *work) +{ + struct mlx5e_ptpsq *ptpsq = + container_of(work, struct mlx5e_ptpsq, report_unhealthy_work); + + mlx5e_reporter_tx_ptpsq_unhealthy(ptpsq); +} + +static int mlx5e_ptp_open_txqsq(struct mlx5e_ptp *c, u32 tisn, + int txq_ix, struct mlx5e_ptp_params *cparams, + int tc, struct mlx5e_ptpsq *ptpsq) +{ + struct mlx5e_sq_param *sqp = &cparams->txq_sq_param; + struct mlx5e_txqsq *txqsq = &ptpsq->txqsq; + struct mlx5e_create_sq_param csp = {}; + int err; + + err = mlx5e_ptp_alloc_txqsq(c, txq_ix, &cparams->params, sqp, + txqsq, tc, ptpsq); + if (err) + return err; + + csp.tisn = tisn; + csp.tis_lst_sz = 1; + csp.cqn = txqsq->cq.mcq.cqn; + csp.wq_ctrl = &txqsq->wq_ctrl; + csp.min_inline_mode = txqsq->min_inline_mode; + csp.ts_cqe_to_dest_cqn = ptpsq->ts_cq.mcq.cqn; + + err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, 0, &txqsq->sqn); + if (err) + goto err_free_txqsq; + + err = mlx5e_ptp_alloc_traffic_db(ptpsq, dev_to_node(mlx5_core_dma_dev(c->mdev))); + if (err) + goto err_free_txqsq; + + INIT_WORK(&ptpsq->report_unhealthy_work, mlx5e_ptpsq_unhealthy_work); + + return 0; + +err_free_txqsq: + mlx5e_free_txqsq(txqsq); + + return err; +} + +static void mlx5e_ptp_close_txqsq(struct mlx5e_ptpsq *ptpsq) +{ + struct mlx5e_txqsq *sq = &ptpsq->txqsq; + struct mlx5_core_dev *mdev = sq->mdev; + + if (current_work() != &ptpsq->report_unhealthy_work) + cancel_work_sync(&ptpsq->report_unhealthy_work); + mlx5e_ptp_free_traffic_db(ptpsq); + cancel_work_sync(&sq->recover_work); + mlx5e_ptp_destroy_sq(mdev, sq->sqn); + mlx5e_free_txqsq_descs(sq); + mlx5e_free_txqsq(sq); +} + +static int mlx5e_ptp_open_txqsqs(struct mlx5e_ptp *c, + struct mlx5e_ptp_params *cparams) +{ + struct mlx5e_params *params = &cparams->params; + u8 num_tc = mlx5e_get_dcb_num_tc(params); + int ix_base; + int err; + int tc; + + ix_base = num_tc * params->num_channels; + + for (tc = 0; tc < num_tc; tc++) { + int txq_ix = ix_base + tc; + + err = mlx5e_ptp_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix, + cparams, tc, &c->ptpsq[tc]); + if (err) + goto close_txqsq; + } + + return 0; + +close_txqsq: + for (--tc; tc >= 0; tc--) + mlx5e_ptp_close_txqsq(&c->ptpsq[tc]); + + return err; +} + +static void mlx5e_ptp_close_txqsqs(struct mlx5e_ptp *c) +{ + int tc; + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_ptp_close_txqsq(&c->ptpsq[tc]); +} + +static int mlx5e_ptp_open_tx_cqs(struct mlx5e_ptp *c, + struct mlx5e_ptp_params *cparams) +{ + struct mlx5e_params *params = &cparams->params; + struct mlx5e_create_cq_param ccp = {}; + struct dim_cq_moder ptp_moder = {}; + struct mlx5e_cq_param *cq_param; + u8 num_tc; + int err; + int tc; + + num_tc = mlx5e_get_dcb_num_tc(params); + + ccp.node = dev_to_node(mlx5_core_dma_dev(c->mdev)); + ccp.ch_stats = c->stats; + ccp.napi = &c->napi; + ccp.ix = MLX5E_PTP_CHANNEL_IX; + + cq_param = &cparams->txq_sq_param.cqp; + + for (tc = 0; tc < num_tc; tc++) { + struct mlx5e_cq *cq = &c->ptpsq[tc].txqsq.cq; + + err = mlx5e_open_cq(c->priv, ptp_moder, cq_param, &ccp, cq); + if (err) + goto out_err_txqsq_cq; + } + + for (tc = 0; tc < num_tc; tc++) { + struct mlx5e_cq *cq = &c->ptpsq[tc].ts_cq; + struct mlx5e_ptpsq *ptpsq = &c->ptpsq[tc]; + + err = mlx5e_open_cq(c->priv, ptp_moder, cq_param, &ccp, cq); + if (err) + goto out_err_ts_cq; + + ptpsq->cq_stats = &c->priv->ptp_stats.cq[tc]; + } + + return 0; + +out_err_ts_cq: + for (--tc; tc >= 0; tc--) + mlx5e_close_cq(&c->ptpsq[tc].ts_cq); + tc = num_tc; +out_err_txqsq_cq: + for (--tc; tc >= 0; tc--) + mlx5e_close_cq(&c->ptpsq[tc].txqsq.cq); + + return err; +} + +static int mlx5e_ptp_open_rx_cq(struct mlx5e_ptp *c, + struct mlx5e_ptp_params *cparams) +{ + struct mlx5e_create_cq_param ccp = {}; + struct dim_cq_moder ptp_moder = {}; + struct mlx5e_cq_param *cq_param; + struct mlx5e_cq *cq = &c->rq.cq; + + ccp.node = dev_to_node(mlx5_core_dma_dev(c->mdev)); + ccp.ch_stats = c->stats; + ccp.napi = &c->napi; + ccp.ix = MLX5E_PTP_CHANNEL_IX; + + cq_param = &cparams->rq_param.cqp; + + return mlx5e_open_cq(c->priv, ptp_moder, cq_param, &ccp, cq); +} + +static void mlx5e_ptp_close_tx_cqs(struct mlx5e_ptp *c) +{ + int tc; + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_close_cq(&c->ptpsq[tc].ts_cq); + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_close_cq(&c->ptpsq[tc].txqsq.cq); +} + +static void mlx5e_ptp_build_sq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq; + + mlx5e_build_sq_param_common(mdev, param); + + wq = MLX5_ADDR_OF(sqc, sqc, wq); + MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); + param->stop_room = mlx5e_stop_room_for_max_wqe(mdev); + mlx5e_build_tx_cq_param(mdev, params, ¶m->cqp); +} + +static void mlx5e_ptp_build_rq_param(struct mlx5_core_dev *mdev, + struct net_device *netdev, + u16 q_counter, + struct mlx5e_ptp_params *ptp_params) +{ + struct mlx5e_rq_param *rq_params = &ptp_params->rq_param; + struct mlx5e_params *params = &ptp_params->params; + + params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC; + mlx5e_init_rq_type_params(mdev, params); + params->sw_mtu = netdev->max_mtu; + mlx5e_build_rq_param(mdev, params, NULL, q_counter, rq_params); +} + +static void mlx5e_ptp_build_params(struct mlx5e_ptp *c, + struct mlx5e_ptp_params *cparams, + struct mlx5e_params *orig) +{ + struct mlx5e_params *params = &cparams->params; + + params->tx_min_inline_mode = orig->tx_min_inline_mode; + params->num_channels = orig->num_channels; + params->hard_mtu = orig->hard_mtu; + params->sw_mtu = orig->sw_mtu; + params->mqprio = orig->mqprio; + + /* SQ */ + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + params->log_sq_size = + min(MLX5_CAP_GEN_2(c->mdev, ts_cqe_metadata_size2wqe_counter), + MLX5E_PTP_MAX_LOG_SQ_SIZE); + params->log_sq_size = min(params->log_sq_size, orig->log_sq_size); + mlx5e_ptp_build_sq_param(c->mdev, params, &cparams->txq_sq_param); + } + /* RQ */ + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) { + params->vlan_strip_disable = orig->vlan_strip_disable; + mlx5e_ptp_build_rq_param(c->mdev, c->netdev, c->priv->q_counter, cparams); + } +} + +static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params, + struct mlx5e_rq *rq) +{ + struct mlx5_core_dev *mdev = c->mdev; + struct mlx5e_priv *priv = c->priv; + int err; + + rq->wq_type = params->rq_wq_type; + rq->pdev = c->pdev; + rq->netdev = priv->netdev; + rq->priv = priv; + rq->clock = &mdev->clock; + rq->tstamp = &priv->tstamp; + rq->mdev = mdev; + rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + rq->stats = &c->priv->ptp_stats.rq; + rq->ix = MLX5E_PTP_CHANNEL_IX; + rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); + err = mlx5e_rq_set_handlers(rq, params, false); + if (err) + return err; + + return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, 0); +} + +static int mlx5e_ptp_open_rq(struct mlx5e_ptp *c, struct mlx5e_params *params, + struct mlx5e_rq_param *rq_param) +{ + int node = dev_to_node(c->mdev->device); + int err; + + err = mlx5e_init_ptp_rq(c, params, &c->rq); + if (err) + return err; + + return mlx5e_open_rq(params, rq_param, NULL, node, &c->rq); +} + +static int mlx5e_ptp_open_queues(struct mlx5e_ptp *c, + struct mlx5e_ptp_params *cparams) +{ + int err; + + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + err = mlx5e_ptp_open_tx_cqs(c, cparams); + if (err) + return err; + + err = mlx5e_ptp_open_txqsqs(c, cparams); + if (err) + goto close_tx_cqs; + } + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) { + err = mlx5e_ptp_open_rx_cq(c, cparams); + if (err) + goto close_txqsq; + + err = mlx5e_ptp_open_rq(c, &cparams->params, &cparams->rq_param); + if (err) + goto close_rx_cq; + } + return 0; + +close_rx_cq: + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) + mlx5e_close_cq(&c->rq.cq); +close_txqsq: + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) + mlx5e_ptp_close_txqsqs(c); +close_tx_cqs: + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) + mlx5e_ptp_close_tx_cqs(c); + + return err; +} + +static void mlx5e_ptp_close_queues(struct mlx5e_ptp *c) +{ + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) { + mlx5e_close_rq(&c->rq); + mlx5e_close_cq(&c->rq.cq); + } + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + mlx5e_ptp_close_txqsqs(c); + mlx5e_ptp_close_tx_cqs(c); + } +} + +static int mlx5e_ptp_set_state(struct mlx5e_ptp *c, struct mlx5e_params *params) +{ + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_TX_PORT_TS)) + __set_bit(MLX5E_PTP_STATE_TX, c->state); + + if (params->ptp_rx) + __set_bit(MLX5E_PTP_STATE_RX, c->state); + + return bitmap_empty(c->state, MLX5E_PTP_STATE_NUM_STATES) ? -EINVAL : 0; +} + +static void mlx5e_ptp_rx_unset_fs(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_ptp_fs *ptp_fs = mlx5e_fs_get_ptp(fs); + + if (!ptp_fs->valid) + return; + + mlx5e_fs_tt_redirect_del_rule(ptp_fs->l2_rule); + mlx5e_fs_tt_redirect_any_destroy(fs); + + mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v6_rule); + mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v4_rule); + mlx5e_fs_tt_redirect_udp_destroy(fs); + ptp_fs->valid = false; +} + +static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv) +{ + u32 tirn = mlx5e_rx_res_get_tirn_ptp(priv->rx_res); + struct mlx5e_flow_steering *fs = priv->fs; + struct mlx5_flow_handle *rule; + struct mlx5e_ptp_fs *ptp_fs; + int err; + + ptp_fs = mlx5e_fs_get_ptp(fs); + if (ptp_fs->valid) + return 0; + + err = mlx5e_fs_tt_redirect_udp_create(fs); + if (err) + goto out_free; + + rule = mlx5e_fs_tt_redirect_udp_add_rule(fs, MLX5_TT_IPV4_UDP, + tirn, PTP_EV_PORT); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto out_destroy_fs_udp; + } + ptp_fs->udp_v4_rule = rule; + + rule = mlx5e_fs_tt_redirect_udp_add_rule(fs, MLX5_TT_IPV6_UDP, + tirn, PTP_EV_PORT); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto out_destroy_udp_v4_rule; + } + ptp_fs->udp_v6_rule = rule; + + err = mlx5e_fs_tt_redirect_any_create(fs); + if (err) + goto out_destroy_udp_v6_rule; + + rule = mlx5e_fs_tt_redirect_any_add_rule(fs, tirn, ETH_P_1588); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto out_destroy_fs_any; + } + ptp_fs->l2_rule = rule; + ptp_fs->valid = true; + + return 0; + +out_destroy_fs_any: + mlx5e_fs_tt_redirect_any_destroy(fs); +out_destroy_udp_v6_rule: + mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v6_rule); +out_destroy_udp_v4_rule: + mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v4_rule); +out_destroy_fs_udp: + mlx5e_fs_tt_redirect_udp_destroy(fs); +out_free: + return err; +} + +int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, + u8 lag_port, struct mlx5e_ptp **cp) +{ + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_ptp_params *cparams; + struct mlx5e_ptp *c; + int err; + + + c = kvzalloc_node(sizeof(*c), GFP_KERNEL, dev_to_node(mlx5_core_dma_dev(mdev))); + cparams = kvzalloc(sizeof(*cparams), GFP_KERNEL); + if (!c || !cparams) { + err = -ENOMEM; + goto err_free; + } + + c->priv = priv; + c->mdev = priv->mdev; + c->tstamp = &priv->tstamp; + c->pdev = mlx5_core_dma_dev(priv->mdev); + c->netdev = priv->netdev; + c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey); + c->num_tc = mlx5e_get_dcb_num_tc(params); + c->stats = &priv->ptp_stats.ch; + c->lag_port = lag_port; + + err = mlx5e_ptp_set_state(c, params); + if (err) + goto err_free; + + netif_napi_add(netdev, &c->napi, mlx5e_ptp_napi_poll); + + mlx5e_ptp_build_params(c, cparams, params); + + err = mlx5e_ptp_open_queues(c, cparams); + if (unlikely(err)) + goto err_napi_del; + + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) + priv->rx_ptp_opened = true; + + *cp = c; + + kvfree(cparams); + + return 0; + +err_napi_del: + netif_napi_del(&c->napi); +err_free: + kvfree(cparams); + kvfree(c); + return err; +} + +void mlx5e_ptp_close(struct mlx5e_ptp *c) +{ + mlx5e_ptp_close_queues(c); + netif_napi_del(&c->napi); + + kvfree(c); +} + +void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c) +{ + int tc; + + napi_enable(&c->napi); + + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_activate_txqsq(&c->ptpsq[tc].txqsq); + } + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) { + mlx5e_ptp_rx_set_fs(c->priv); + mlx5e_activate_rq(&c->rq); + } + mlx5e_trigger_napi_sched(&c->napi); +} + +void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c) +{ + int tc; + + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) + mlx5e_deactivate_rq(&c->rq); + + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_deactivate_txqsq(&c->ptpsq[tc].txqsq); + } + + napi_disable(&c->napi); +} + +int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn) +{ + if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state)) + return -EINVAL; + + *rqn = c->rq.rqn; + return 0; +} + +int mlx5e_ptp_alloc_rx_fs(struct mlx5e_flow_steering *fs, + const struct mlx5e_profile *profile) +{ + struct mlx5e_ptp_fs *ptp_fs; + + if (!mlx5e_profile_feature_cap(profile, PTP_RX)) + return 0; + + ptp_fs = kzalloc(sizeof(*ptp_fs), GFP_KERNEL); + if (!ptp_fs) + return -ENOMEM; + mlx5e_fs_set_ptp(fs, ptp_fs); + + return 0; +} + +void mlx5e_ptp_free_rx_fs(struct mlx5e_flow_steering *fs, + const struct mlx5e_profile *profile) +{ + struct mlx5e_ptp_fs *ptp_fs = mlx5e_fs_get_ptp(fs); + + if (!mlx5e_profile_feature_cap(profile, PTP_RX)) + return; + + mlx5e_ptp_rx_unset_fs(fs); + kfree(ptp_fs); +} + +int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set) +{ + struct mlx5e_ptp *c = priv->channels.ptp; + + if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX)) + return 0; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + if (set) { + if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state)) { + netdev_WARN_ONCE(priv->netdev, "Don't try to add PTP RX-FS rules"); + return -EINVAL; + } + return mlx5e_ptp_rx_set_fs(priv); + } + /* set == false */ + if (c && test_bit(MLX5E_PTP_STATE_RX, c->state)) { + netdev_WARN_ONCE(priv->netdev, "Don't try to remove PTP RX-FS rules"); + return -EINVAL; + } + mlx5e_ptp_rx_unset_fs(priv->fs); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h new file mode 100644 index 0000000000..7b700d0f95 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h @@ -0,0 +1,147 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies. */ + +#ifndef __MLX5_EN_PTP_H__ +#define __MLX5_EN_PTP_H__ + +#include "en.h" +#include "en_stats.h" +#include "en/txrx.h" +#include +#include +#include +#include + +#define MLX5E_PTP_CHANNEL_IX 0 +#define MLX5E_PTP_MAX_LOG_SQ_SIZE (8U) +#define MLX5E_PTP_TS_CQE_UNDELIVERED_TIMEOUT (1 * NSEC_PER_SEC) + +struct mlx5e_ptp_metadata_fifo { + u8 cc; + u8 pc; + u8 mask; + u8 *data; +}; + +struct mlx5e_ptp_metadata_map { + u16 undelivered_counter; + u16 capacity; + struct sk_buff **data; +}; + +struct mlx5e_ptpsq { + struct mlx5e_txqsq txqsq; + struct mlx5e_cq ts_cq; + struct mlx5e_ptp_cq_stats *cq_stats; + u16 ts_cqe_ctr_mask; + + struct work_struct report_unhealthy_work; + struct mlx5e_ptp_port_ts_cqe_list *ts_cqe_pending_list; + struct mlx5e_ptp_metadata_fifo metadata_freelist; + struct mlx5e_ptp_metadata_map metadata_map; +}; + +enum { + MLX5E_PTP_STATE_TX, + MLX5E_PTP_STATE_RX, + MLX5E_PTP_STATE_NUM_STATES, +}; + +struct mlx5e_ptp { + /* data path */ + struct mlx5e_ptpsq ptpsq[MLX5E_MAX_NUM_TC]; + struct mlx5e_rq rq; + struct napi_struct napi; + struct device *pdev; + struct net_device *netdev; + __be32 mkey_be; + u8 num_tc; + u8 lag_port; + + /* data path - accessed per napi poll */ + struct mlx5e_ch_stats *stats; + + /* control */ + struct mlx5e_priv *priv; + struct mlx5_core_dev *mdev; + struct hwtstamp_config *tstamp; + DECLARE_BITMAP(state, MLX5E_PTP_STATE_NUM_STATES); +}; + +static inline bool mlx5e_use_ptpsq(struct sk_buff *skb) +{ + struct flow_keys fk; + + if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) + return false; + + if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) + return false; + + if (fk.basic.n_proto == htons(ETH_P_1588)) + return true; + + if (fk.basic.n_proto != htons(ETH_P_IP) && + fk.basic.n_proto != htons(ETH_P_IPV6)) + return false; + + return (fk.basic.ip_proto == IPPROTO_UDP && + fk.ports.dst == htons(PTP_EV_PORT)); +} + +static inline void mlx5e_ptp_metadata_fifo_push(struct mlx5e_ptp_metadata_fifo *fifo, u8 metadata) +{ + fifo->data[fifo->mask & fifo->pc++] = metadata; +} + +static inline u8 +mlx5e_ptp_metadata_fifo_pop(struct mlx5e_ptp_metadata_fifo *fifo) +{ + return fifo->data[fifo->mask & fifo->cc++]; +} + +static inline void +mlx5e_ptp_metadata_map_put(struct mlx5e_ptp_metadata_map *map, + struct sk_buff *skb, u8 metadata) +{ + WARN_ON_ONCE(map->data[metadata]); + map->data[metadata] = skb; +} + +static inline bool mlx5e_ptpsq_metadata_freelist_empty(struct mlx5e_ptpsq *ptpsq) +{ + struct mlx5e_ptp_metadata_fifo *freelist; + + if (likely(!ptpsq)) + return false; + + freelist = &ptpsq->metadata_freelist; + + return freelist->pc == freelist->cc; +} + +int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, + u8 lag_port, struct mlx5e_ptp **cp); +void mlx5e_ptp_close(struct mlx5e_ptp *c); +void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c); +void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c); +int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn); +int mlx5e_ptp_alloc_rx_fs(struct mlx5e_flow_steering *fs, + const struct mlx5e_profile *profile); +void mlx5e_ptp_free_rx_fs(struct mlx5e_flow_steering *fs, + const struct mlx5e_profile *profile); +int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set); + +void mlx5e_ptpsq_track_metadata(struct mlx5e_ptpsq *ptpsq, u8 metadata); + +enum { + MLX5E_SKB_CB_CQE_HWTSTAMP = BIT(0), + MLX5E_SKB_CB_PORT_HWTSTAMP = BIT(1), +}; + +void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type, + ktime_t hwtstamp, + struct mlx5e_ptp_cq_stats *cq_stats); + +void mlx5e_skb_cb_hwtstamp_init(struct sk_buff *skb); +#endif /* __MLX5_EN_PTP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c new file mode 100644 index 0000000000..244bc15a42 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -0,0 +1,523 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ +#include + +#include +#include "en.h" +#include "params.h" +#include "../qos.h" +#include "en/htb.h" + +struct qos_sq_callback_params { + struct mlx5e_priv *priv; + struct mlx5e_channels *chs; +}; + +int mlx5e_qos_bytes_rate_check(struct mlx5_core_dev *mdev, u64 nbytes) +{ + if (nbytes < BYTES_IN_MBIT) { + qos_warn(mdev, "Input rate (%llu Bytes/sec) below minimum supported (%u Bytes/sec)\n", + nbytes, BYTES_IN_MBIT); + return -EINVAL; + } + return 0; +} + +static u32 mlx5e_qos_bytes2mbits(struct mlx5_core_dev *mdev, u64 nbytes) +{ + return div_u64(nbytes, BYTES_IN_MBIT); +} + +int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev) +{ + return min(MLX5E_QOS_MAX_LEAF_NODES, mlx5_qos_max_leaf_nodes(mdev)); +} + +/* TX datapath API */ + +u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid) +{ + /* These channel params are safe to access from the datapath, because: + * 1. This function is called only after checking selq->htb_maj_id != 0, + * and the number of queues can't change while HTB offload is active. + * 2. When selq->htb_maj_id becomes 0, synchronize_rcu waits for + * mlx5e_select_queue to finish while holding priv->state_lock, + * preventing other code from changing the number of queues. + */ + bool is_ptp = MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS); + + return (chs->params.num_channels + is_ptp) * mlx5e_get_dcb_num_tc(&chs->params) + qid; +} + +/* SQ lifecycle */ + +static struct mlx5e_txqsq *mlx5e_get_qos_sq(struct mlx5e_priv *priv, int qid) +{ + struct mlx5e_params *params = &priv->channels.params; + struct mlx5e_txqsq __rcu **qos_sqs; + struct mlx5e_channel *c; + int ix; + + ix = qid % params->num_channels; + qid /= params->num_channels; + c = priv->channels.c[ix]; + + qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs); + return mlx5e_state_dereference(priv, qos_sqs[qid]); +} + +int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs, + u16 node_qid, u32 hw_id) +{ + struct mlx5e_create_cq_param ccp = {}; + struct mlx5e_txqsq __rcu **qos_sqs; + struct mlx5e_sq_param param_sq; + struct mlx5e_cq_param param_cq; + int txq_ix, ix, qid, err = 0; + struct mlx5e_params *params; + struct mlx5e_channel *c; + struct mlx5e_txqsq *sq; + + params = &chs->params; + + txq_ix = mlx5e_qid_from_qos(chs, node_qid); + + WARN_ON(node_qid > priv->htb_max_qos_sqs); + if (node_qid == priv->htb_max_qos_sqs) { + struct mlx5e_sq_stats *stats, **stats_list = NULL; + + if (priv->htb_max_qos_sqs == 0) { + stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev), + sizeof(*stats_list), + GFP_KERNEL); + if (!stats_list) + return -ENOMEM; + } + stats = kzalloc(sizeof(*stats), GFP_KERNEL); + if (!stats) { + kvfree(stats_list); + return -ENOMEM; + } + if (stats_list) + WRITE_ONCE(priv->htb_qos_sq_stats, stats_list); + WRITE_ONCE(priv->htb_qos_sq_stats[node_qid], stats); + /* Order htb_max_qos_sqs increment after writing the array pointer. + * Pairs with smp_load_acquire in en_stats.c. + */ + smp_store_release(&priv->htb_max_qos_sqs, priv->htb_max_qos_sqs + 1); + } + + ix = node_qid % params->num_channels; + qid = node_qid / params->num_channels; + c = chs->c[ix]; + + qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs); + sq = kzalloc(sizeof(*sq), GFP_KERNEL); + + if (!sq) + return -ENOMEM; + + mlx5e_build_create_cq_param(&ccp, c); + + memset(¶m_sq, 0, sizeof(param_sq)); + memset(¶m_cq, 0, sizeof(param_cq)); + mlx5e_build_sq_param(priv->mdev, params, ¶m_sq); + mlx5e_build_tx_cq_param(priv->mdev, params, ¶m_cq); + err = mlx5e_open_cq(priv, params->tx_cq_moderation, ¶m_cq, &ccp, &sq->cq); + if (err) + goto err_free_sq; + err = mlx5e_open_txqsq(c, priv->tisn[c->lag_port][0], txq_ix, params, + ¶m_sq, sq, 0, hw_id, + priv->htb_qos_sq_stats[node_qid]); + if (err) + goto err_close_cq; + + rcu_assign_pointer(qos_sqs[qid], sq); + + return 0; + +err_close_cq: + mlx5e_close_cq(&sq->cq); +err_free_sq: + kfree(sq); + return err; +} + +static int mlx5e_open_qos_sq_cb_wrapper(void *data, u16 node_qid, u32 hw_id) +{ + struct qos_sq_callback_params *cb_params = data; + + return mlx5e_open_qos_sq(cb_params->priv, cb_params->chs, node_qid, hw_id); +} + +int mlx5e_activate_qos_sq(void *data, u16 node_qid, u32 hw_id) +{ + struct mlx5e_priv *priv = data; + struct mlx5e_txqsq *sq; + u16 qid; + + sq = mlx5e_get_qos_sq(priv, node_qid); + + qid = mlx5e_qid_from_qos(&priv->channels, node_qid); + + /* If it's a new queue, it will be marked as started at this point. + * Stop it before updating txq2sq. + */ + mlx5e_tx_disable_queue(netdev_get_tx_queue(priv->netdev, qid)); + + priv->txq2sq[qid] = sq; + + /* Make the change to txq2sq visible before the queue is started. + * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE, + * which pairs with this barrier. + */ + smp_wmb(); + + qos_dbg(priv->mdev, "Activate QoS SQ qid %u\n", node_qid); + mlx5e_activate_txqsq(sq); + + return 0; +} + +void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid) +{ + struct mlx5e_txqsq *sq; + + sq = mlx5e_get_qos_sq(priv, qid); + if (!sq) /* Handle the case when the SQ failed to open. */ + return; + + qos_dbg(priv->mdev, "Deactivate QoS SQ qid %u\n", qid); + mlx5e_deactivate_txqsq(sq); + + priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, qid)] = NULL; + + /* Make the change to txq2sq visible before the queue is started again. + * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE, + * which pairs with this barrier. + */ + smp_wmb(); +} + +void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid) +{ + struct mlx5e_txqsq __rcu **qos_sqs; + struct mlx5e_params *params; + struct mlx5e_channel *c; + struct mlx5e_txqsq *sq; + int ix; + + params = &priv->channels.params; + + ix = qid % params->num_channels; + qid /= params->num_channels; + c = priv->channels.c[ix]; + qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs); + sq = rcu_replace_pointer(qos_sqs[qid], NULL, lockdep_is_held(&priv->state_lock)); + if (!sq) /* Handle the case when the SQ failed to open. */ + return; + + synchronize_rcu(); /* Sync with NAPI. */ + + mlx5e_close_txqsq(sq); + mlx5e_close_cq(&sq->cq); + kfree(sq); +} + +void mlx5e_qos_close_queues(struct mlx5e_channel *c) +{ + struct mlx5e_txqsq __rcu **qos_sqs; + int i; + + qos_sqs = rcu_replace_pointer(c->qos_sqs, NULL, lockdep_is_held(&c->priv->state_lock)); + if (!qos_sqs) + return; + synchronize_rcu(); /* Sync with NAPI. */ + + for (i = 0; i < c->qos_sqs_size; i++) { + struct mlx5e_txqsq *sq; + + sq = mlx5e_state_dereference(c->priv, qos_sqs[i]); + if (!sq) /* Handle the case when the SQ failed to open. */ + continue; + + mlx5e_close_txqsq(sq); + mlx5e_close_cq(&sq->cq); + kfree(sq); + } + + kvfree(qos_sqs); +} + +void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs) +{ + int i; + + for (i = 0; i < chs->num; i++) + mlx5e_qos_close_queues(chs->c[i]); +} + +int mlx5e_qos_alloc_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs) +{ + u16 qos_sqs_size; + int i; + + qos_sqs_size = DIV_ROUND_UP(mlx5e_qos_max_leaf_nodes(priv->mdev), chs->num); + + for (i = 0; i < chs->num; i++) { + struct mlx5e_txqsq **sqs; + + sqs = kvcalloc(qos_sqs_size, sizeof(struct mlx5e_txqsq *), GFP_KERNEL); + if (!sqs) + goto err_free; + + WRITE_ONCE(chs->c[i]->qos_sqs_size, qos_sqs_size); + smp_wmb(); /* Pairs with mlx5e_napi_poll. */ + rcu_assign_pointer(chs->c[i]->qos_sqs, sqs); + } + + return 0; + +err_free: + while (--i >= 0) { + struct mlx5e_txqsq **sqs; + + sqs = rcu_replace_pointer(chs->c[i]->qos_sqs, NULL, + lockdep_is_held(&priv->state_lock)); + + synchronize_rcu(); /* Sync with NAPI. */ + kvfree(sqs); + } + return -ENOMEM; +} + +int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs) +{ + struct qos_sq_callback_params callback_params; + int err; + + err = mlx5e_qos_alloc_queues(priv, chs); + if (err) + return err; + + callback_params.priv = priv; + callback_params.chs = chs; + + err = mlx5e_htb_enumerate_leaves(priv->htb, mlx5e_open_qos_sq_cb_wrapper, &callback_params); + if (err) { + mlx5e_qos_close_all_queues(chs); + return err; + } + + return 0; +} + +void mlx5e_qos_activate_queues(struct mlx5e_priv *priv) +{ + mlx5e_htb_enumerate_leaves(priv->htb, mlx5e_activate_qos_sq, priv); +} + +void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c) +{ + struct mlx5e_params *params = &c->priv->channels.params; + struct mlx5e_txqsq __rcu **qos_sqs; + int i; + + qos_sqs = mlx5e_state_dereference(c->priv, c->qos_sqs); + if (!qos_sqs) + return; + + for (i = 0; i < c->qos_sqs_size; i++) { + u16 qid = params->num_channels * i + c->ix; + struct mlx5e_txqsq *sq; + + sq = mlx5e_state_dereference(c->priv, qos_sqs[i]); + if (!sq) /* Handle the case when the SQ failed to open. */ + continue; + + qos_dbg(c->mdev, "Deactivate QoS SQ qid %u\n", qid); + mlx5e_deactivate_txqsq(sq); + + /* The queue is disabled, no synchronization with datapath is needed. */ + c->priv->txq2sq[mlx5e_qid_from_qos(&c->priv->channels, qid)] = NULL; + } +} + +void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs) +{ + int i; + + for (i = 0; i < chs->num; i++) + mlx5e_qos_deactivate_queues(chs->c[i]); +} + +void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_queue *txq) +{ + qos_dbg(priv->mdev, "Reactivate QoS SQ qid %u\n", qid); + netdev_tx_reset_queue(txq); + netif_tx_start_queue(txq); +} + +void mlx5e_reset_qdisc(struct net_device *dev, u16 qid) +{ + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, qid); + struct Qdisc *qdisc = dev_queue->qdisc_sleeping; + + if (!qdisc) + return; + + spin_lock_bh(qdisc_lock(qdisc)); + qdisc_reset(qdisc); + spin_unlock_bh(qdisc_lock(qdisc)); +} + +int mlx5e_htb_setup_tc(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb_qopt) +{ + struct mlx5e_htb *htb = priv->htb; + int res; + + if (!htb && htb_qopt->command != TC_HTB_CREATE) + return -EINVAL; + + if (htb_qopt->prio || htb_qopt->quantum) { + NL_SET_ERR_MSG_MOD(htb_qopt->extack, + "prio and quantum parameters are not supported by device with HTB offload enabled."); + return -EOPNOTSUPP; + } + + switch (htb_qopt->command) { + case TC_HTB_CREATE: + if (!mlx5_qos_is_supported(priv->mdev)) { + NL_SET_ERR_MSG_MOD(htb_qopt->extack, + "Missing QoS capabilities. Try disabling SRIOV or use a supported device."); + return -EOPNOTSUPP; + } + priv->htb = mlx5e_htb_alloc(); + htb = priv->htb; + if (!htb) + return -ENOMEM; + res = mlx5e_htb_init(htb, htb_qopt, priv->netdev, priv->mdev, &priv->selq, priv); + if (res) { + mlx5e_htb_free(htb); + priv->htb = NULL; + } + return res; + case TC_HTB_DESTROY: + mlx5e_htb_cleanup(htb); + mlx5e_htb_free(htb); + priv->htb = NULL; + return 0; + case TC_HTB_LEAF_ALLOC_QUEUE: + res = mlx5e_htb_leaf_alloc_queue(htb, htb_qopt->classid, htb_qopt->parent_classid, + htb_qopt->rate, htb_qopt->ceil, htb_qopt->extack); + if (res < 0) + return res; + htb_qopt->qid = res; + return 0; + case TC_HTB_LEAF_TO_INNER: + return mlx5e_htb_leaf_to_inner(htb, htb_qopt->parent_classid, htb_qopt->classid, + htb_qopt->rate, htb_qopt->ceil, htb_qopt->extack); + case TC_HTB_LEAF_DEL: + return mlx5e_htb_leaf_del(htb, &htb_qopt->classid, htb_qopt->extack); + case TC_HTB_LEAF_DEL_LAST: + case TC_HTB_LEAF_DEL_LAST_FORCE: + return mlx5e_htb_leaf_del_last(htb, htb_qopt->classid, + htb_qopt->command == TC_HTB_LEAF_DEL_LAST_FORCE, + htb_qopt->extack); + case TC_HTB_NODE_MODIFY: + return mlx5e_htb_node_modify(htb, htb_qopt->classid, htb_qopt->rate, htb_qopt->ceil, + htb_qopt->extack); + case TC_HTB_LEAF_QUERY_QUEUE: + res = mlx5e_htb_get_txq_by_classid(htb, htb_qopt->classid); + if (res < 0) + return res; + htb_qopt->qid = res; + return 0; + default: + return -EOPNOTSUPP; + } +} + +struct mlx5e_mqprio_rl { + struct mlx5_core_dev *mdev; + u32 root_id; + u32 *leaves_id; + u8 num_tc; +}; + +struct mlx5e_mqprio_rl *mlx5e_mqprio_rl_alloc(void) +{ + return kvzalloc(sizeof(struct mlx5e_mqprio_rl), GFP_KERNEL); +} + +void mlx5e_mqprio_rl_free(struct mlx5e_mqprio_rl *rl) +{ + kvfree(rl); +} + +int mlx5e_mqprio_rl_init(struct mlx5e_mqprio_rl *rl, struct mlx5_core_dev *mdev, u8 num_tc, + u64 max_rate[]) +{ + int err; + int tc; + + if (!mlx5_qos_is_supported(mdev)) { + qos_warn(mdev, "Missing QoS capabilities. Try disabling SRIOV or use a supported device."); + return -EOPNOTSUPP; + } + if (num_tc > mlx5e_qos_max_leaf_nodes(mdev)) + return -EINVAL; + + rl->mdev = mdev; + rl->num_tc = num_tc; + rl->leaves_id = kvcalloc(num_tc, sizeof(*rl->leaves_id), GFP_KERNEL); + if (!rl->leaves_id) + return -ENOMEM; + + err = mlx5_qos_create_root_node(mdev, &rl->root_id); + if (err) + goto err_free_leaves; + + qos_dbg(mdev, "Root created, id %#x\n", rl->root_id); + + for (tc = 0; tc < num_tc; tc++) { + u32 max_average_bw; + + max_average_bw = mlx5e_qos_bytes2mbits(mdev, max_rate[tc]); + err = mlx5_qos_create_leaf_node(mdev, rl->root_id, 0, max_average_bw, + &rl->leaves_id[tc]); + if (err) + goto err_destroy_leaves; + + qos_dbg(mdev, "Leaf[%d] created, id %#x, max average bw %u Mbits/sec\n", + tc, rl->leaves_id[tc], max_average_bw); + } + return 0; + +err_destroy_leaves: + while (--tc >= 0) + mlx5_qos_destroy_node(mdev, rl->leaves_id[tc]); + mlx5_qos_destroy_node(mdev, rl->root_id); +err_free_leaves: + kvfree(rl->leaves_id); + return err; +} + +void mlx5e_mqprio_rl_cleanup(struct mlx5e_mqprio_rl *rl) +{ + int tc; + + for (tc = 0; tc < rl->num_tc; tc++) + mlx5_qos_destroy_node(rl->mdev, rl->leaves_id[tc]); + mlx5_qos_destroy_node(rl->mdev, rl->root_id); + kvfree(rl->leaves_id); +} + +int mlx5e_mqprio_rl_get_node_hw_id(struct mlx5e_mqprio_rl *rl, int tc, u32 *hw_id) +{ + if (tc >= rl->num_tc) + return -EINVAL; + + *hw_id = rl->leaves_id[tc]; + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h new file mode 100644 index 0000000000..4947afa23b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5E_EN_QOS_H +#define __MLX5E_EN_QOS_H + +#include + +#define BYTES_IN_MBIT 125000 + +struct mlx5e_priv; +struct mlx5e_htb; +struct mlx5e_channels; +struct mlx5e_channel; +struct tc_htb_qopt_offload; + +int mlx5e_qos_bytes_rate_check(struct mlx5_core_dev *mdev, u64 nbytes); +int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev); + +/* SQ lifecycle */ +int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs, + u16 node_qid, u32 hw_id); +int mlx5e_activate_qos_sq(void *data, u16 node_qid, u32 hw_id); +void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid); +void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid); +void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_queue *txq); +void mlx5e_reset_qdisc(struct net_device *dev, u16 qid); + +int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs); +void mlx5e_qos_activate_queues(struct mlx5e_priv *priv); +void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c); +void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs); +void mlx5e_qos_close_queues(struct mlx5e_channel *c); +void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs); +int mlx5e_qos_alloc_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs); + +/* TX datapath API */ +u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid); + +/* HTB API */ +int mlx5e_htb_setup_tc(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb); + +/* MQPRIO TX rate limit */ +struct mlx5e_mqprio_rl; +struct mlx5e_mqprio_rl *mlx5e_mqprio_rl_alloc(void); +void mlx5e_mqprio_rl_free(struct mlx5e_mqprio_rl *rl); +int mlx5e_mqprio_rl_init(struct mlx5e_mqprio_rl *rl, struct mlx5_core_dev *mdev, u8 num_tc, + u64 max_rate[]); +void mlx5e_mqprio_rl_cleanup(struct mlx5e_mqprio_rl *rl); +int mlx5e_mqprio_rl_get_node_hw_id(struct mlx5e_mqprio_rl *rl, int tc, u32 *hw_id); +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c new file mode 100644 index 0000000000..016a61c52c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c @@ -0,0 +1,351 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#include +#include +#include + +#include "mlx5_core.h" +#include "eswitch.h" +#include "esw/acl/ofld.h" +#include "en_rep.h" + +struct mlx5e_rep_bond { + struct notifier_block nb; + struct netdev_net_notifier nn; + struct list_head metadata_list; +}; + +struct mlx5e_rep_bond_slave_entry { + struct list_head list; + struct net_device *netdev; +}; + +struct mlx5e_rep_bond_metadata { + struct list_head list; /* link to global list of rep_bond_metadata */ + struct mlx5_eswitch *esw; + /* private of uplink holding rep bond metadata list */ + struct net_device *lag_dev; + u32 metadata_reg_c_0; + + struct list_head slaves_list; /* slaves list */ + int slaves; +}; + +static struct mlx5e_rep_bond_metadata * +mlx5e_lookup_rep_bond_metadata(struct mlx5_rep_uplink_priv *uplink_priv, + const struct net_device *lag_dev) +{ + struct mlx5e_rep_bond_metadata *found = NULL; + struct mlx5e_rep_bond_metadata *cur; + + list_for_each_entry(cur, &uplink_priv->bond->metadata_list, list) { + if (cur->lag_dev == lag_dev) { + found = cur; + break; + } + } + + return found; +} + +static struct mlx5e_rep_bond_slave_entry * +mlx5e_lookup_rep_bond_slave_entry(struct mlx5e_rep_bond_metadata *mdata, + const struct net_device *netdev) +{ + struct mlx5e_rep_bond_slave_entry *found = NULL; + struct mlx5e_rep_bond_slave_entry *cur; + + list_for_each_entry(cur, &mdata->slaves_list, list) { + if (cur->netdev == netdev) { + found = cur; + break; + } + } + + return found; +} + +static void mlx5e_rep_bond_metadata_release(struct mlx5e_rep_bond_metadata *mdata) +{ + netdev_dbg(mdata->lag_dev, "destroy rep_bond_metadata(%d)\n", + mdata->metadata_reg_c_0); + list_del(&mdata->list); + mlx5_esw_match_metadata_free(mdata->esw, mdata->metadata_reg_c_0); + WARN_ON(!list_empty(&mdata->slaves_list)); + kfree(mdata); +} + +/* This must be called under rtnl_lock */ +int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev, + struct net_device *lag_dev) +{ + struct mlx5e_rep_bond_slave_entry *s_entry; + struct mlx5e_rep_bond_metadata *mdata; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_priv *priv; + int err; + + ASSERT_RTNL(); + + rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev); + if (!mdata) { + /* First netdev becomes slave, no metadata presents the lag_dev. Create one */ + mdata = kzalloc(sizeof(*mdata), GFP_KERNEL); + if (!mdata) + return -ENOMEM; + + mdata->lag_dev = lag_dev; + mdata->esw = esw; + INIT_LIST_HEAD(&mdata->slaves_list); + mdata->metadata_reg_c_0 = mlx5_esw_match_metadata_alloc(esw); + if (!mdata->metadata_reg_c_0) { + kfree(mdata); + return -ENOSPC; + } + list_add(&mdata->list, &rpriv->uplink_priv.bond->metadata_list); + + netdev_dbg(lag_dev, "create rep_bond_metadata(%d)\n", + mdata->metadata_reg_c_0); + } + + s_entry = kzalloc(sizeof(*s_entry), GFP_KERNEL); + if (!s_entry) { + err = -ENOMEM; + goto entry_alloc_err; + } + + s_entry->netdev = netdev; + priv = netdev_priv(netdev); + rpriv = priv->ppriv; + + err = mlx5_esw_acl_ingress_vport_metadata_update(esw, rpriv->rep->vport, + mdata->metadata_reg_c_0); + if (err) + goto ingress_err; + + mdata->slaves++; + list_add_tail(&s_entry->list, &mdata->slaves_list); + netdev_dbg(netdev, "enslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n", + rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0); + + return 0; + +ingress_err: + kfree(s_entry); +entry_alloc_err: + if (!mdata->slaves) + mlx5e_rep_bond_metadata_release(mdata); + return err; +} + +/* This must be called under rtnl_lock */ +void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw, + const struct net_device *netdev, + const struct net_device *lag_dev) +{ + struct mlx5e_rep_bond_slave_entry *s_entry; + struct mlx5e_rep_bond_metadata *mdata; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_priv *priv; + + ASSERT_RTNL(); + + rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev); + if (!mdata) + return; + + s_entry = mlx5e_lookup_rep_bond_slave_entry(mdata, netdev); + if (!s_entry) + return; + + priv = netdev_priv(netdev); + rpriv = priv->ppriv; + + /* Reset bond_metadata to zero first then reset all ingress/egress + * acls and rx rules of unslave representor's vport + */ + mlx5_esw_acl_ingress_vport_metadata_update(esw, rpriv->rep->vport, 0); + mlx5_esw_acl_egress_vport_unbond(esw, rpriv->rep->vport); + mlx5e_rep_bond_update(priv, false); + + list_del(&s_entry->list); + + netdev_dbg(netdev, "unslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n", + rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0); + + if (--mdata->slaves == 0) + mlx5e_rep_bond_metadata_release(mdata); + kfree(s_entry); +} + +static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev) +{ + return netif_is_lag_port(netdev) && mlx5e_eswitch_vf_rep(netdev); +} + +static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr) +{ + struct netdev_notifier_changelowerstate_info *info; + struct netdev_lag_lower_state_info *lag_info; + struct mlx5e_rep_priv *rpriv; + struct net_device *lag_dev; + struct mlx5e_priv *priv; + struct list_head *iter; + struct net_device *dev; + u16 acl_vport_num; + u16 fwd_vport_num; + int err; + + info = ptr; + lag_info = info->lower_state_info; + /* This is not an event of a representor becoming active slave */ + if (!lag_info->tx_enabled) + return; + + priv = netdev_priv(netdev); + rpriv = priv->ppriv; + fwd_vport_num = rpriv->rep->vport; + lag_dev = netdev_master_upper_dev_get(netdev); + if (!lag_dev) + return; + + netdev_dbg(netdev, "lag_dev(%s)'s slave vport(%d) is txable(%d)\n", + lag_dev->name, fwd_vport_num, net_lag_port_dev_txable(netdev)); + + /* Point everyone's egress acl to the vport of the active representor */ + netdev_for_each_lower_dev(lag_dev, dev, iter) { + priv = netdev_priv(dev); + rpriv = priv->ppriv; + acl_vport_num = rpriv->rep->vport; + if (acl_vport_num != fwd_vport_num) { + /* Only single rx_rule for unique bond_metadata should be + * present, delete it if it's saved as passive vport's + * rx_rule with destination as passive vport's root_ft + */ + mlx5e_rep_bond_update(priv, true); + err = mlx5_esw_acl_egress_vport_bond(priv->mdev->priv.eswitch, + fwd_vport_num, + acl_vport_num); + if (err) + netdev_warn(dev, + "configure slave vport(%d) egress fwd, err(%d)", + acl_vport_num, err); + } + } + + /* Insert new rx_rule for unique bond_metadata, save it as active vport's + * rx_rule with new destination as active vport's root_ft + */ + err = mlx5e_rep_bond_update(netdev_priv(netdev), false); + if (err) + netdev_warn(netdev, "configure active slave vport(%d) rx_rule, err(%d)", + fwd_vport_num, err); +} + +static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr) +{ + struct netdev_notifier_changeupper_info *info = ptr; + struct mlx5e_rep_priv *rpriv; + struct net_device *lag_dev; + struct mlx5e_priv *priv; + + priv = netdev_priv(netdev); + rpriv = priv->ppriv; + lag_dev = info->upper_dev; + + netdev_dbg(netdev, "%sslave vport(%d) lag(%s)\n", + info->linking ? "en" : "un", rpriv->rep->vport, lag_dev->name); + + if (info->linking) + mlx5e_rep_bond_enslave(priv->mdev->priv.eswitch, netdev, lag_dev); + else + mlx5e_rep_bond_unslave(priv->mdev->priv.eswitch, netdev, lag_dev); +} + +/* Bond device of representors and netdev events are used here in specific way + * to support eswitch vports bonding and to perform failover of eswitch vport + * by modifying the vport's egress acl of lower dev representors. Thus this + * also change the traditional behavior of lower dev under bond device. + * All non-representor netdevs or representors of other vendors as lower dev + * of bond device are not supported. + */ +static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + struct mlx5e_rep_priv *rpriv; + struct mlx5e_rep_bond *bond; + struct mlx5e_priv *priv; + + if (!mlx5e_rep_is_lag_netdev(netdev)) + return NOTIFY_DONE; + + bond = container_of(nb, struct mlx5e_rep_bond, nb); + priv = netdev_priv(netdev); + rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, REP_ETH); + /* Verify VF representor is on the same device of the bond handling the netevent. */ + if (rpriv->uplink_priv.bond != bond) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_CHANGELOWERSTATE: + mlx5e_rep_changelowerstate_event(netdev, ptr); + break; + case NETDEV_CHANGEUPPER: + mlx5e_rep_changeupper_event(netdev, ptr); + break; + } + return NOTIFY_DONE; +} + +/* If HW support eswitch vports bonding, register a specific notifier to + * handle it when two or more representors are bonded + */ +int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; + struct net_device *netdev = rpriv->netdev; + struct mlx5e_priv *priv; + int ret = 0; + + priv = netdev_priv(netdev); + if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch)) + goto out; + + uplink_priv->bond = kvzalloc(sizeof(*uplink_priv->bond), GFP_KERNEL); + if (!uplink_priv->bond) { + ret = -ENOMEM; + goto out; + } + + INIT_LIST_HEAD(&uplink_priv->bond->metadata_list); + uplink_priv->bond->nb.notifier_call = mlx5e_rep_esw_bond_netevent; + ret = register_netdevice_notifier_dev_net(netdev, + &uplink_priv->bond->nb, + &uplink_priv->bond->nn); + if (ret) { + netdev_err(netdev, "register bonding netevent notifier, err(%d)\n", ret); + kvfree(uplink_priv->bond); + uplink_priv->bond = NULL; + } + +out: + return ret; +} + +void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + + if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch) || + !rpriv->uplink_priv.bond) + return; + + unregister_netdevice_notifier_dev_net(rpriv->netdev, + &rpriv->uplink_priv.bond->nb, + &rpriv->uplink_priv.bond->nn); + kvfree(rpriv->uplink_priv.bond); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c new file mode 100644 index 0000000000..5d128c5b45 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -0,0 +1,599 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021 Mellanox Technologies. */ + +#include +#include +#include +#include +#include "bridge.h" +#include "esw/bridge.h" +#include "en_rep.h" + +#define MLX5_ESW_BRIDGE_UPDATE_INTERVAL 1000 + +struct mlx5_bridge_switchdev_fdb_work { + struct work_struct work; + struct switchdev_notifier_fdb_info fdb_info; + struct net_device *dev; + struct mlx5_esw_bridge_offloads *br_offloads; + bool add; +}; + +static bool mlx5_esw_bridge_dev_same_esw(struct net_device *dev, struct mlx5_eswitch *esw) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return esw == priv->mdev->priv.eswitch; +} + +static bool mlx5_esw_bridge_dev_same_hw(struct net_device *dev, struct mlx5_eswitch *esw) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev, *esw_mdev; + u64 system_guid, esw_system_guid; + + mdev = priv->mdev; + esw_mdev = esw->dev; + + system_guid = mlx5_query_nic_system_image_guid(mdev); + esw_system_guid = mlx5_query_nic_system_image_guid(esw_mdev); + + return system_guid == esw_system_guid; +} + +static struct net_device * +mlx5_esw_bridge_lag_rep_get(struct net_device *dev, struct mlx5_eswitch *esw) +{ + struct net_device *lower; + struct list_head *iter; + + netdev_for_each_lower_dev(dev, lower, iter) { + struct mlx5_core_dev *mdev; + struct mlx5e_priv *priv; + + if (!mlx5e_eswitch_rep(lower)) + continue; + + priv = netdev_priv(lower); + mdev = priv->mdev; + if (mlx5_lag_is_shared_fdb(mdev) && mlx5_esw_bridge_dev_same_esw(lower, esw)) + return lower; + } + + return NULL; +} + +static struct net_device * +mlx5_esw_bridge_rep_vport_num_vhca_id_get(struct net_device *dev, struct mlx5_eswitch *esw, + u16 *vport_num, u16 *esw_owner_vhca_id) +{ + struct mlx5e_rep_priv *rpriv; + struct mlx5e_priv *priv; + + if (netif_is_lag_master(dev)) + dev = mlx5_esw_bridge_lag_rep_get(dev, esw); + + if (!dev || !mlx5e_eswitch_rep(dev) || !mlx5_esw_bridge_dev_same_hw(dev, esw)) + return NULL; + + priv = netdev_priv(dev); + + if (!priv->mdev->priv.eswitch->br_offloads) + return NULL; + + rpriv = priv->ppriv; + *vport_num = rpriv->rep->vport; + *esw_owner_vhca_id = MLX5_CAP_GEN(priv->mdev, vhca_id); + return dev; +} + +static struct net_device * +mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(struct net_device *dev, struct mlx5_eswitch *esw, + u16 *vport_num, u16 *esw_owner_vhca_id) +{ + struct net_device *lower_dev; + struct list_head *iter; + + if (netif_is_lag_master(dev) || mlx5e_eswitch_rep(dev)) + return mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, esw, vport_num, + esw_owner_vhca_id); + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + struct net_device *rep; + + if (netif_is_bridge_master(lower_dev)) + continue; + + rep = mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(lower_dev, esw, vport_num, + esw_owner_vhca_id); + if (rep) + return rep; + } + + return NULL; +} + +static bool mlx5_esw_bridge_is_local(struct net_device *dev, struct net_device *rep, + struct mlx5_eswitch *esw) +{ + struct mlx5_core_dev *mdev; + struct mlx5e_priv *priv; + + if (!mlx5_esw_bridge_dev_same_esw(rep, esw)) + return false; + + priv = netdev_priv(rep); + mdev = priv->mdev; + if (netif_is_lag_master(dev)) + return mlx5_lag_is_shared_fdb(mdev) && mlx5_lag_is_master(mdev); + return true; +} + +static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr) +{ + struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb, + struct mlx5_esw_bridge_offloads, + netdev_nb); + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_changeupper_info *info = ptr; + struct net_device *upper = info->upper_dev, *rep; + struct mlx5_eswitch *esw = br_offloads->esw; + u16 vport_num, esw_owner_vhca_id; + struct netlink_ext_ack *extack; + int err = 0; + + if (!netif_is_bridge_master(upper)) + return 0; + + rep = mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, esw, &vport_num, &esw_owner_vhca_id); + if (!rep) + return 0; + + extack = netdev_notifier_info_to_extack(&info->info); + + if (mlx5_esw_bridge_is_local(dev, rep, esw)) + err = info->linking ? + mlx5_esw_bridge_vport_link(upper, vport_num, esw_owner_vhca_id, + br_offloads, extack) : + mlx5_esw_bridge_vport_unlink(upper, vport_num, esw_owner_vhca_id, + br_offloads, extack); + else if (mlx5_esw_bridge_dev_same_hw(rep, esw)) + err = info->linking ? + mlx5_esw_bridge_vport_peer_link(upper, vport_num, esw_owner_vhca_id, + br_offloads, extack) : + mlx5_esw_bridge_vport_peer_unlink(upper, vport_num, esw_owner_vhca_id, + br_offloads, extack); + + return err; +} + +static int +mlx5_esw_bridge_changeupper_validate_netdev(void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_changeupper_info *info = ptr; + struct net_device *upper = info->upper_dev; + struct net_device *lower; + struct list_head *iter; + + if (!netif_is_bridge_master(upper) || !netif_is_lag_master(dev)) + return 0; + + netdev_for_each_lower_dev(dev, lower, iter) { + struct mlx5_core_dev *mdev; + struct mlx5e_priv *priv; + + if (!mlx5e_eswitch_rep(lower)) + continue; + + priv = netdev_priv(lower); + mdev = priv->mdev; + if (!mlx5_lag_is_active(mdev)) + return -EAGAIN; + if (!mlx5_lag_is_shared_fdb(mdev)) + return -EOPNOTSUPP; + } + + return 0; +} + +static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + int err = 0; + + switch (event) { + case NETDEV_PRECHANGEUPPER: + err = mlx5_esw_bridge_changeupper_validate_netdev(ptr); + break; + + case NETDEV_CHANGEUPPER: + err = mlx5_esw_bridge_port_changeupper(nb, ptr); + break; + } + + return notifier_from_errno(err); +} + +static int +mlx5_esw_bridge_port_obj_add(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct netlink_ext_ack *extack = switchdev_notifier_info_to_extack(&port_obj_info->info); + const struct switchdev_obj *obj = port_obj_info->obj; + const struct switchdev_obj_port_vlan *vlan; + const struct switchdev_obj_port_mdb *mdb; + u16 vport_num, esw_owner_vhca_id; + int err; + + if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num, + &esw_owner_vhca_id)) + return 0; + + port_obj_info->handled = true; + + switch (obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); + err = mlx5_esw_bridge_port_vlan_add(vport_num, esw_owner_vhca_id, vlan->vid, + vlan->flags, br_offloads, extack); + break; + case SWITCHDEV_OBJ_ID_PORT_MDB: + mdb = SWITCHDEV_OBJ_PORT_MDB(obj); + err = mlx5_esw_bridge_port_mdb_add(dev, vport_num, esw_owner_vhca_id, mdb->addr, + mdb->vid, br_offloads, extack); + break; + default: + return -EOPNOTSUPP; + } + return err; +} + +static int +mlx5_esw_bridge_port_obj_del(struct net_device *dev, + struct switchdev_notifier_port_obj_info *port_obj_info, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + const struct switchdev_obj *obj = port_obj_info->obj; + const struct switchdev_obj_port_vlan *vlan; + const struct switchdev_obj_port_mdb *mdb; + u16 vport_num, esw_owner_vhca_id; + + if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num, + &esw_owner_vhca_id)) + return 0; + + port_obj_info->handled = true; + + switch (obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); + mlx5_esw_bridge_port_vlan_del(vport_num, esw_owner_vhca_id, vlan->vid, br_offloads); + break; + case SWITCHDEV_OBJ_ID_PORT_MDB: + mdb = SWITCHDEV_OBJ_PORT_MDB(obj); + mlx5_esw_bridge_port_mdb_del(dev, vport_num, esw_owner_vhca_id, mdb->addr, mdb->vid, + br_offloads); + break; + default: + return -EOPNOTSUPP; + } + return 0; +} + +static int +mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev, + struct switchdev_notifier_port_attr_info *port_attr_info, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct netlink_ext_ack *extack = switchdev_notifier_info_to_extack(&port_attr_info->info); + const struct switchdev_attr *attr = port_attr_info->attr; + u16 vport_num, esw_owner_vhca_id; + int err = 0; + + if (!mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num, + &esw_owner_vhca_id)) + return 0; + + port_attr_info->handled = true; + + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: + if (attr->u.brport_flags.mask & ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD)) { + NL_SET_ERR_MSG_MOD(extack, "Flag is not supported"); + err = -EINVAL; + } + break; + case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: + break; + case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: + err = mlx5_esw_bridge_ageing_time_set(vport_num, esw_owner_vhca_id, + attr->u.ageing_time, br_offloads); + break; + case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: + err = mlx5_esw_bridge_vlan_filtering_set(vport_num, esw_owner_vhca_id, + attr->u.vlan_filtering, br_offloads); + break; + case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL: + err = mlx5_esw_bridge_vlan_proto_set(vport_num, + esw_owner_vhca_id, + attr->u.vlan_protocol, + br_offloads); + break; + case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED: + err = mlx5_esw_bridge_mcast_set(vport_num, esw_owner_vhca_id, + !attr->u.mc_disabled, br_offloads); + break; + default: + err = -EOPNOTSUPP; + } + + return err; +} + +static int mlx5_esw_bridge_event_blocking(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb, + struct mlx5_esw_bridge_offloads, + nb_blk); + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + int err; + + switch (event) { + case SWITCHDEV_PORT_OBJ_ADD: + err = mlx5_esw_bridge_port_obj_add(dev, ptr, br_offloads); + break; + case SWITCHDEV_PORT_OBJ_DEL: + err = mlx5_esw_bridge_port_obj_del(dev, ptr, br_offloads); + break; + case SWITCHDEV_PORT_ATTR_SET: + err = mlx5_esw_bridge_port_obj_attr_set(dev, ptr, br_offloads); + break; + default: + err = 0; + } + + return notifier_from_errno(err); +} + +static void +mlx5_esw_bridge_cleanup_switchdev_fdb_work(struct mlx5_bridge_switchdev_fdb_work *fdb_work) +{ + dev_put(fdb_work->dev); + kfree(fdb_work->fdb_info.addr); + kfree(fdb_work); +} + +static void mlx5_esw_bridge_switchdev_fdb_event_work(struct work_struct *work) +{ + struct mlx5_bridge_switchdev_fdb_work *fdb_work = + container_of(work, struct mlx5_bridge_switchdev_fdb_work, work); + struct switchdev_notifier_fdb_info *fdb_info = + &fdb_work->fdb_info; + struct mlx5_esw_bridge_offloads *br_offloads = + fdb_work->br_offloads; + struct net_device *dev = fdb_work->dev; + u16 vport_num, esw_owner_vhca_id; + + rtnl_lock(); + + if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num, + &esw_owner_vhca_id)) + goto out; + + if (fdb_work->add) + mlx5_esw_bridge_fdb_create(dev, vport_num, esw_owner_vhca_id, br_offloads, + fdb_info); + else + mlx5_esw_bridge_fdb_remove(dev, vport_num, esw_owner_vhca_id, br_offloads, + fdb_info); + +out: + rtnl_unlock(); + mlx5_esw_bridge_cleanup_switchdev_fdb_work(fdb_work); +} + +static struct mlx5_bridge_switchdev_fdb_work * +mlx5_esw_bridge_init_switchdev_fdb_work(struct net_device *dev, bool add, + struct switchdev_notifier_fdb_info *fdb_info, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_bridge_switchdev_fdb_work *work; + u8 *addr; + + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return ERR_PTR(-ENOMEM); + + INIT_WORK(&work->work, mlx5_esw_bridge_switchdev_fdb_event_work); + memcpy(&work->fdb_info, fdb_info, sizeof(work->fdb_info)); + + addr = kzalloc(ETH_ALEN, GFP_ATOMIC); + if (!addr) { + kfree(work); + return ERR_PTR(-ENOMEM); + } + ether_addr_copy(addr, fdb_info->addr); + work->fdb_info.addr = addr; + + dev_hold(dev); + work->dev = dev; + work->br_offloads = br_offloads; + work->add = add; + return work; +} + +static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb, + struct mlx5_esw_bridge_offloads, + nb); + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + struct switchdev_notifier_fdb_info *fdb_info; + struct mlx5_bridge_switchdev_fdb_work *work; + struct mlx5_eswitch *esw = br_offloads->esw; + struct switchdev_notifier_info *info = ptr; + u16 vport_num, esw_owner_vhca_id; + struct net_device *upper, *rep; + + if (event == SWITCHDEV_PORT_ATTR_SET) { + int err = mlx5_esw_bridge_port_obj_attr_set(dev, ptr, br_offloads); + + return notifier_from_errno(err); + } + + upper = netdev_master_upper_dev_get_rcu(dev); + if (!upper) + return NOTIFY_DONE; + if (!netif_is_bridge_master(upper)) + return NOTIFY_DONE; + + rep = mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, esw, &vport_num, &esw_owner_vhca_id); + if (!rep) + return NOTIFY_DONE; + + switch (event) { + case SWITCHDEV_FDB_ADD_TO_BRIDGE: + fdb_info = container_of(info, + struct switchdev_notifier_fdb_info, + info); + mlx5_esw_bridge_fdb_update_used(dev, vport_num, esw_owner_vhca_id, br_offloads, + fdb_info); + break; + case SWITCHDEV_FDB_DEL_TO_BRIDGE: + /* only handle the event on peers */ + if (mlx5_esw_bridge_is_local(dev, rep, esw)) + break; + + fdb_info = container_of(info, + struct switchdev_notifier_fdb_info, + info); + /* Mark for deletion to prevent the update wq task from + * spuriously refreshing the entry which would mark it again as + * offloaded in SW bridge. After this fallthrough to regular + * async delete code. + */ + mlx5_esw_bridge_fdb_mark_deleted(dev, vport_num, esw_owner_vhca_id, br_offloads, + fdb_info); + fallthrough; + case SWITCHDEV_FDB_ADD_TO_DEVICE: + case SWITCHDEV_FDB_DEL_TO_DEVICE: + fdb_info = container_of(info, + struct switchdev_notifier_fdb_info, + info); + + work = mlx5_esw_bridge_init_switchdev_fdb_work(dev, + event == SWITCHDEV_FDB_ADD_TO_DEVICE, + fdb_info, + br_offloads); + if (IS_ERR(work)) { + WARN_ONCE(1, "Failed to init switchdev work, err=%ld", + PTR_ERR(work)); + return notifier_from_errno(PTR_ERR(work)); + } + + queue_work(br_offloads->wq, &work->work); + break; + default: + break; + } + return NOTIFY_DONE; +} + +static void mlx5_esw_bridge_update_work(struct work_struct *work) +{ + struct mlx5_esw_bridge_offloads *br_offloads = container_of(work, + struct mlx5_esw_bridge_offloads, + update_work.work); + + rtnl_lock(); + mlx5_esw_bridge_update(br_offloads); + rtnl_unlock(); + + queue_delayed_work(br_offloads->wq, &br_offloads->update_work, + msecs_to_jiffies(MLX5_ESW_BRIDGE_UPDATE_INTERVAL)); +} + +void mlx5e_rep_bridge_init(struct mlx5e_priv *priv) +{ + struct mlx5_esw_bridge_offloads *br_offloads; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = + mdev->priv.eswitch; + int err; + + rtnl_lock(); + br_offloads = mlx5_esw_bridge_init(esw); + rtnl_unlock(); + if (IS_ERR(br_offloads)) { + esw_warn(mdev, "Failed to init esw bridge (err=%ld)\n", PTR_ERR(br_offloads)); + return; + } + + br_offloads->wq = alloc_ordered_workqueue("mlx5_bridge_wq", 0); + if (!br_offloads->wq) { + esw_warn(mdev, "Failed to allocate bridge offloads workqueue\n"); + goto err_alloc_wq; + } + + br_offloads->nb.notifier_call = mlx5_esw_bridge_switchdev_event; + err = register_switchdev_notifier(&br_offloads->nb); + if (err) { + esw_warn(mdev, "Failed to register switchdev notifier (err=%d)\n", err); + goto err_register_swdev; + } + + br_offloads->nb_blk.notifier_call = mlx5_esw_bridge_event_blocking; + err = register_switchdev_blocking_notifier(&br_offloads->nb_blk); + if (err) { + esw_warn(mdev, "Failed to register blocking switchdev notifier (err=%d)\n", err); + goto err_register_swdev_blk; + } + + br_offloads->netdev_nb.notifier_call = mlx5_esw_bridge_switchdev_port_event; + err = register_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb); + if (err) { + esw_warn(mdev, "Failed to register bridge offloads netdevice notifier (err=%d)\n", + err); + goto err_register_netdev; + } + INIT_DELAYED_WORK(&br_offloads->update_work, mlx5_esw_bridge_update_work); + queue_delayed_work(br_offloads->wq, &br_offloads->update_work, + msecs_to_jiffies(MLX5_ESW_BRIDGE_UPDATE_INTERVAL)); + return; + +err_register_netdev: + unregister_switchdev_blocking_notifier(&br_offloads->nb_blk); +err_register_swdev_blk: + unregister_switchdev_notifier(&br_offloads->nb); +err_register_swdev: + destroy_workqueue(br_offloads->wq); +err_alloc_wq: + rtnl_lock(); + mlx5_esw_bridge_cleanup(esw); + rtnl_unlock(); +} + +void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5_esw_bridge_offloads *br_offloads; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = + mdev->priv.eswitch; + + br_offloads = esw->br_offloads; + if (!br_offloads) + return; + + cancel_delayed_work_sync(&br_offloads->update_work); + unregister_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb); + unregister_switchdev_blocking_notifier(&br_offloads->nb_blk); + unregister_switchdev_notifier(&br_offloads->nb); + destroy_workqueue(br_offloads->wq); + rtnl_lock(); + mlx5_esw_bridge_cleanup(esw); + rtnl_unlock(); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.h new file mode 100644 index 0000000000..fbeb642428 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5_EN_REP_BRIDGE__ +#define __MLX5_EN_REP_BRIDGE__ + +#include "en.h" + +#if IS_ENABLED(CONFIG_MLX5_BRIDGE) + +void mlx5e_rep_bridge_init(struct mlx5e_priv *priv); +void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv); + +#else /* CONFIG_MLX5_BRIDGE */ + +static inline void mlx5e_rep_bridge_init(struct mlx5e_priv *priv) {} +static inline void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv) {} + +#endif /* CONFIG_MLX5_BRIDGE */ + +#endif /* __MLX5_EN_REP_BRIDGE__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c new file mode 100644 index 0000000000..2e9bee4e52 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c @@ -0,0 +1,398 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "neigh.h" +#include "tc.h" +#include "en_rep.h" +#include "fs_core.h" +#include "diag/en_rep_tracepoint.h" + +static unsigned long mlx5e_rep_ipv6_interval(void) +{ + if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl) + return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME); + + return ~0UL; +} + +static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv) +{ + unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); + unsigned long ipv6_interval = mlx5e_rep_ipv6_interval(); + struct net_device *netdev = rpriv->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + + rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval); + mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval); +} + +void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + + mlx5_fc_queue_stats_work(priv->mdev, + &neigh_update->neigh_stats_work, + neigh_update->min_interval); +} + +static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe) +{ + return refcount_inc_not_zero(&nhe->refcnt); +} + +static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe); + +void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe) +{ + if (refcount_dec_and_test(&nhe->refcnt)) { + mlx5e_rep_neigh_entry_remove(nhe); + kfree_rcu(nhe, rcu); + } +} + +static struct mlx5e_neigh_hash_entry * +mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv, + struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_neigh_hash_entry *next = NULL; + + rcu_read_lock(); + + for (next = nhe ? + list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, + &nhe->neigh_list, + struct mlx5e_neigh_hash_entry, + neigh_list) : + list_first_or_null_rcu(&rpriv->neigh_update.neigh_list, + struct mlx5e_neigh_hash_entry, + neigh_list); + next; + next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, + &next->neigh_list, + struct mlx5e_neigh_hash_entry, + neigh_list)) + if (mlx5e_rep_neigh_entry_hold(next)) + break; + + rcu_read_unlock(); + + if (nhe) + mlx5e_rep_neigh_entry_release(nhe); + + return next; +} + +static void mlx5e_rep_neigh_stats_work(struct work_struct *work) +{ + struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv, + neigh_update.neigh_stats_work.work); + struct net_device *netdev = rpriv->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_neigh_hash_entry *nhe = NULL; + + rtnl_lock(); + if (!list_empty(&rpriv->neigh_update.neigh_list)) + mlx5e_rep_queue_neigh_stats_work(priv); + + while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL) + mlx5e_tc_update_neigh_used_value(nhe); + + rtnl_unlock(); +} + +struct neigh_update_work { + struct work_struct work; + struct neighbour *n; + struct mlx5e_neigh_hash_entry *nhe; +}; + +static void mlx5e_release_neigh_update_work(struct neigh_update_work *update_work) +{ + neigh_release(update_work->n); + mlx5e_rep_neigh_entry_release(update_work->nhe); + kfree(update_work); +} + +static void mlx5e_rep_neigh_update(struct work_struct *work) +{ + struct neigh_update_work *update_work = container_of(work, struct neigh_update_work, + work); + struct mlx5e_neigh_hash_entry *nhe = update_work->nhe; + struct neighbour *n = update_work->n; + struct mlx5e_encap_entry *e = NULL; + bool neigh_connected, same_dev; + unsigned char ha[ETH_ALEN]; + u8 nud_state, dead; + + rtnl_lock(); + + /* If these parameters are changed after we release the lock, + * we'll receive another event letting us know about it. + * We use this lock to avoid inconsistency between the neigh validity + * and it's hw address. + */ + read_lock_bh(&n->lock); + memcpy(ha, n->ha, ETH_ALEN); + nud_state = n->nud_state; + dead = n->dead; + same_dev = READ_ONCE(nhe->neigh_dev) == n->dev; + read_unlock_bh(&n->lock); + + neigh_connected = (nud_state & NUD_VALID) && !dead; + + trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected); + + if (!same_dev) + goto out; + + /* mlx5e_get_next_init_encap() releases previous encap before returning + * the next one. + */ + while ((e = mlx5e_get_next_init_encap(nhe, e)) != NULL) + mlx5e_rep_update_flows(netdev_priv(e->out_dev), e, neigh_connected, ha); + +out: + rtnl_unlock(); + mlx5e_release_neigh_update_work(update_work); +} + +static struct neigh_update_work *mlx5e_alloc_neigh_update_work(struct mlx5e_priv *priv, + struct neighbour *n) +{ + struct neigh_update_work *update_work; + struct mlx5e_neigh_hash_entry *nhe; + struct mlx5e_neigh m_neigh = {}; + + update_work = kzalloc(sizeof(*update_work), GFP_ATOMIC); + if (WARN_ON(!update_work)) + return NULL; + + m_neigh.family = n->ops->family; + memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + + /* Obtain reference to nhe as last step in order not to release it in + * atomic context. + */ + rcu_read_lock(); + nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh); + rcu_read_unlock(); + if (!nhe) { + kfree(update_work); + return NULL; + } + + INIT_WORK(&update_work->work, mlx5e_rep_neigh_update); + neigh_hold(n); + update_work->n = n; + update_work->nhe = nhe; + + return update_work; +} + +static int mlx5e_rep_netevent_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv, + neigh_update.netevent_nb); + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + struct net_device *netdev = rpriv->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_neigh_hash_entry *nhe = NULL; + struct neigh_update_work *update_work; + struct neigh_parms *p; + struct neighbour *n; + bool found = false; + + switch (event) { + case NETEVENT_NEIGH_UPDATE: + n = ptr; +#if IS_ENABLED(CONFIG_IPV6) + if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl) +#else + if (n->tbl != &arp_tbl) +#endif + return NOTIFY_DONE; + + update_work = mlx5e_alloc_neigh_update_work(priv, n); + if (!update_work) + return NOTIFY_DONE; + + queue_work(priv->wq, &update_work->work); + break; + + case NETEVENT_DELAY_PROBE_TIME_UPDATE: + p = ptr; + + /* We check the device is present since we don't care about + * changes in the default table, we only care about changes + * done per device delay prob time parameter. + */ +#if IS_ENABLED(CONFIG_IPV6) + if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl)) +#else + if (!p->dev || p->tbl != &arp_tbl) +#endif + return NOTIFY_DONE; + + rcu_read_lock(); + list_for_each_entry_rcu(nhe, &neigh_update->neigh_list, + neigh_list) { + if (p->dev == READ_ONCE(nhe->neigh_dev)) { + found = true; + break; + } + } + rcu_read_unlock(); + if (!found) + return NOTIFY_DONE; + + neigh_update->min_interval = min_t(unsigned long, + NEIGH_VAR(p, DELAY_PROBE_TIME), + neigh_update->min_interval); + mlx5_fc_update_sampling_interval(priv->mdev, + neigh_update->min_interval); + break; + } + return NOTIFY_DONE; +} + +static const struct rhashtable_params mlx5e_neigh_ht_params = { + .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node), + .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh), + .key_len = sizeof(struct mlx5e_neigh), + .automatic_shrinking = true, +}; + +int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + int err; + + err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params); + if (err) + goto out_err; + + INIT_LIST_HEAD(&neigh_update->neigh_list); + mutex_init(&neigh_update->encap_lock); + INIT_DELAYED_WORK(&neigh_update->neigh_stats_work, + mlx5e_rep_neigh_stats_work); + mlx5e_rep_neigh_update_init_interval(rpriv); + + neigh_update->netevent_nb.notifier_call = mlx5e_rep_netevent_event; + err = register_netevent_notifier(&neigh_update->netevent_nb); + if (err) + goto out_notifier; + return 0; + +out_notifier: + neigh_update->netevent_nb.notifier_call = NULL; + rhashtable_destroy(&neigh_update->neigh_ht); +out_err: + netdev_warn(rpriv->netdev, + "Failed to initialize neighbours handling for vport %d\n", + rpriv->rep->vport); + return err; +} + +void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + + if (!rpriv->neigh_update.netevent_nb.notifier_call) + return; + + unregister_netevent_notifier(&neigh_update->netevent_nb); + + flush_workqueue(priv->wq); /* flush neigh update works */ + + cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work); + + mutex_destroy(&neigh_update->encap_lock); + rhashtable_destroy(&neigh_update->neigh_ht); +} + +static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv, + struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + int err; + + err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht, + &nhe->rhash_node, + mlx5e_neigh_ht_params); + if (err) + return err; + + list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list); + + return err; +} + +static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv; + + mutex_lock(&rpriv->neigh_update.encap_lock); + + list_del_rcu(&nhe->neigh_list); + + rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht, + &nhe->rhash_node, + mlx5e_neigh_ht_params); + mutex_unlock(&rpriv->neigh_update.encap_lock); +} + +/* This function must only be called under the representor's encap_lock or + * inside rcu read lock section. + */ +struct mlx5e_neigh_hash_entry * +mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, + struct mlx5e_neigh *m_neigh) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + struct mlx5e_neigh_hash_entry *nhe; + + nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh, + mlx5e_neigh_ht_params); + return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL; +} + +int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, + struct mlx5e_neigh *m_neigh, + struct net_device *neigh_dev, + struct mlx5e_neigh_hash_entry **nhe) +{ + int err; + + *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL); + if (!*nhe) + return -ENOMEM; + + (*nhe)->priv = priv; + memcpy(&(*nhe)->m_neigh, m_neigh, sizeof(*m_neigh)); + spin_lock_init(&(*nhe)->encap_list_lock); + INIT_LIST_HEAD(&(*nhe)->encap_list); + refcount_set(&(*nhe)->refcnt, 1); + WRITE_ONCE((*nhe)->neigh_dev, neigh_dev); + + err = mlx5e_rep_neigh_entry_insert(priv, *nhe); + if (err) + goto out_free; + return 0; + +out_free: + kfree(*nhe); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h new file mode 100644 index 0000000000..6fe0ab9709 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies. */ + +#ifndef __MLX5_EN_REP_NEIGH__ +#define __MLX5_EN_REP_NEIGH__ + +#include "en.h" +#include "en_rep.h" + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + +int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv); +void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv); + +struct mlx5e_neigh_hash_entry * +mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, + struct mlx5e_neigh *m_neigh); +int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, + struct mlx5e_neigh *m_neigh, + struct net_device *neigh_dev, + struct mlx5e_neigh_hash_entry **nhe); +void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe); + +void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv); + +#else /* CONFIG_MLX5_CLS_ACT */ + +static inline int +mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) { return 0; } +static inline void +mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) {} + +#endif /* CONFIG_MLX5_CLS_ACT */ + +#endif /* __MLX5_EN_REP_NEIGH__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c new file mode 100644 index 0000000000..b12fe3c5a2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -0,0 +1,750 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies. */ + +#include +#include +#include +#include +#include +#include +#include +#include "tc.h" +#include "neigh.h" +#include "en_rep.h" +#include "eswitch.h" +#include "lib/fs_chains.h" +#include "en/tc_ct.h" +#include "en/mapping.h" +#include "en/tc_tun.h" +#include "lib/port_tun.h" +#include "en/tc/sample.h" +#include "en_accel/ipsec_rxtx.h" +#include "en/tc/int_port.h" +#include "en/tc/act/act.h" + +struct mlx5e_rep_indr_block_priv { + struct net_device *netdev; + struct mlx5e_rep_priv *rpriv; + enum flow_block_binder_type binder_type; + + struct list_head list; +}; + +int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct mlx5e_neigh *m_neigh, + struct net_device *neigh_dev) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; + struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy; + struct mlx5e_neigh_hash_entry *nhe; + int err; + + err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type); + if (err) + return err; + + mutex_lock(&rpriv->neigh_update.encap_lock); + nhe = mlx5e_rep_neigh_entry_lookup(priv, m_neigh); + if (!nhe) { + err = mlx5e_rep_neigh_entry_create(priv, m_neigh, neigh_dev, &nhe); + if (err) { + mutex_unlock(&rpriv->neigh_update.encap_lock); + mlx5_tun_entropy_refcount_dec(tun_entropy, + e->reformat_type); + return err; + } + } + + e->nhe = nhe; + spin_lock(&nhe->encap_list_lock); + list_add_rcu(&e->encap_list, &nhe->encap_list); + spin_unlock(&nhe->encap_list_lock); + + mutex_unlock(&rpriv->neigh_update.encap_lock); + + return 0; +} + +void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; + struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy; + + if (!e->nhe) + return; + + spin_lock(&e->nhe->encap_list_lock); + list_del_rcu(&e->encap_list); + spin_unlock(&e->nhe->encap_list_lock); + + mlx5e_rep_neigh_entry_release(e->nhe); + e->nhe = NULL; + mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type); +} + +void mlx5e_rep_update_flows(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + bool neigh_connected, + unsigned char ha[ETH_ALEN]) +{ + struct ethhdr *eth = (struct ethhdr *)e->encap_header; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + bool encap_connected; + LIST_HEAD(flow_list); + + ASSERT_RTNL(); + + mutex_lock(&esw->offloads.encap_tbl_lock); + encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID); + if (encap_connected == neigh_connected && ether_addr_equal(e->h_dest, ha)) + goto unlock; + + mlx5e_take_all_encap_flows(e, &flow_list); + + if ((e->flags & MLX5_ENCAP_ENTRY_VALID) && + (!neigh_connected || !ether_addr_equal(e->h_dest, ha))) + mlx5e_tc_encap_flows_del(priv, e, &flow_list); + + if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) { + struct net_device *route_dev; + + ether_addr_copy(e->h_dest, ha); + ether_addr_copy(eth->h_dest, ha); + /* Update the encap source mac, in case that we delete + * the flows when encap source mac changed. + */ + route_dev = __dev_get_by_index(dev_net(priv->netdev), e->route_dev_ifindex); + if (route_dev) + ether_addr_copy(eth->h_source, route_dev->dev_addr); + + mlx5e_tc_encap_flows_add(priv, e, &flow_list); + } +unlock: + mutex_unlock(&esw->offloads.encap_tbl_lock); + mlx5e_put_flow_list(priv, &flow_list); +} + +static int +mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv, + struct flow_cls_offload *cls_flower, int flags) +{ + switch (cls_flower->command) { + case FLOW_CLS_REPLACE: + return mlx5e_configure_flower(priv->netdev, priv, cls_flower, + flags); + case FLOW_CLS_DESTROY: + return mlx5e_delete_flower(priv->netdev, priv, cls_flower, + flags); + case FLOW_CLS_STATS: + return mlx5e_stats_flower(priv->netdev, priv, cls_flower, + flags); + default: + return -EOPNOTSUPP; + } +} + +static +int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) +{ + switch (ma->command) { + case TC_CLSMATCHALL_REPLACE: + return mlx5e_tc_configure_matchall(priv, ma); + case TC_CLSMATCHALL_DESTROY: + return mlx5e_tc_delete_matchall(priv, ma); + case TC_CLSMATCHALL_STATS: + mlx5e_tc_stats_matchall(priv, ma); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD); + struct mlx5e_priv *priv = cb_priv; + + if (!priv->netdev || !netif_device_present(priv->netdev)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags); + case TC_SETUP_CLSMATCHALL: + return mlx5e_rep_setup_tc_cls_matchall(priv, type_data); + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct flow_cls_offload tmp, *f = type_data; + struct mlx5e_priv *priv = cb_priv; + struct mlx5_eswitch *esw; + unsigned long flags; + int err; + + flags = MLX5_TC_FLAG(INGRESS) | + MLX5_TC_FLAG(ESW_OFFLOAD) | + MLX5_TC_FLAG(FT_OFFLOAD); + esw = priv->mdev->priv.eswitch; + + switch (type) { + case TC_SETUP_CLSFLOWER: + memcpy(&tmp, f, sizeof(*f)); + + if (!mlx5_chains_prios_supported(esw_chains(esw))) + return -EOPNOTSUPP; + + /* Re-use tc offload path by moving the ft flow to the + * reserved ft chain. + * + * FT offload can use prio range [0, INT_MAX], so we normalize + * it to range [1, mlx5_esw_chains_get_prio_range(esw)] + * as with tc, where prio 0 isn't supported. + * + * We only support chain 0 of FT offload. + */ + if (tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw))) + return -EOPNOTSUPP; + if (tmp.common.chain_index != 0) + return -EOPNOTSUPP; + + tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw)); + tmp.common.prio++; + err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags); + memcpy(&f->stats, &tmp.stats, sizeof(f->stats)); + return err; + default: + return -EOPNOTSUPP; + } +} + +static LIST_HEAD(mlx5e_rep_block_tc_cb_list); +static LIST_HEAD(mlx5e_rep_block_ft_cb_list); +int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct flow_block_offload *f = type_data; + + f->unlocked_driver_cb = true; + + switch (type) { + case TC_SETUP_BLOCK: + return flow_block_cb_setup_simple(type_data, + &mlx5e_rep_block_tc_cb_list, + mlx5e_rep_setup_tc_cb, + priv, priv, true); + case TC_SETUP_FT: + return flow_block_cb_setup_simple(type_data, + &mlx5e_rep_block_ft_cb_list, + mlx5e_rep_setup_ft_cb, + priv, priv, true); + default: + return -EOPNOTSUPP; + } +} + +int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; + int err; + + mutex_init(&uplink_priv->unready_flows_lock); + INIT_LIST_HEAD(&uplink_priv->unready_flows); + + /* init shared tc flow table */ + err = mlx5e_tc_esw_init(uplink_priv); + return err; +} + +void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv) +{ + /* delete shared tc flow table */ + mlx5e_tc_esw_cleanup(&rpriv->uplink_priv); + mutex_destroy(&rpriv->uplink_priv.unready_flows_lock); +} + +void mlx5e_rep_tc_enable(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work, + mlx5e_tc_reoffload_flows_work); +} + +void mlx5e_rep_tc_disable(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work); +} + +int mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work); + + return NOTIFY_OK; +} + +static struct mlx5e_rep_indr_block_priv * +mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv, + struct net_device *netdev, + enum flow_block_binder_type binder_type) +{ + struct mlx5e_rep_indr_block_priv *cb_priv; + + list_for_each_entry(cb_priv, + &rpriv->uplink_priv.tc_indr_block_priv_list, + list) + if (cb_priv->netdev == netdev && + cb_priv->binder_type == binder_type) + return cb_priv; + + return NULL; +} + +static int +mlx5e_rep_indr_offload(struct net_device *netdev, + struct flow_cls_offload *flower, + struct mlx5e_rep_indr_block_priv *indr_priv, + unsigned long flags) +{ + struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev); + int err = 0; + + if (!netif_device_present(indr_priv->rpriv->netdev)) + return -EOPNOTSUPP; + + switch (flower->command) { + case FLOW_CLS_REPLACE: + err = mlx5e_configure_flower(netdev, priv, flower, flags); + break; + case FLOW_CLS_DESTROY: + err = mlx5e_delete_flower(netdev, priv, flower, flags); + break; + case FLOW_CLS_STATS: + err = mlx5e_stats_flower(netdev, priv, flower, flags); + break; + default: + err = -EOPNOTSUPP; + } + + return err; +} + +static int mlx5e_rep_indr_setup_tc_cb(enum tc_setup_type type, + void *type_data, void *indr_priv) +{ + unsigned long flags = MLX5_TC_FLAG(ESW_OFFLOAD); + struct mlx5e_rep_indr_block_priv *priv = indr_priv; + + flags |= (priv->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) ? + MLX5_TC_FLAG(EGRESS) : + MLX5_TC_FLAG(INGRESS); + + switch (type) { + case TC_SETUP_CLSFLOWER: + return mlx5e_rep_indr_offload(priv->netdev, type_data, priv, + flags); + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_rep_indr_setup_ft_cb(enum tc_setup_type type, + void *type_data, void *indr_priv) +{ + struct mlx5e_rep_indr_block_priv *priv = indr_priv; + struct flow_cls_offload *f = type_data; + struct flow_cls_offload tmp; + struct mlx5e_priv *mpriv; + struct mlx5_eswitch *esw; + unsigned long flags; + int err; + + mpriv = netdev_priv(priv->rpriv->netdev); + esw = mpriv->mdev->priv.eswitch; + + flags = MLX5_TC_FLAG(EGRESS) | + MLX5_TC_FLAG(ESW_OFFLOAD) | + MLX5_TC_FLAG(FT_OFFLOAD); + + switch (type) { + case TC_SETUP_CLSFLOWER: + memcpy(&tmp, f, sizeof(*f)); + + /* Re-use tc offload path by moving the ft flow to the + * reserved ft chain. + * + * FT offload can use prio range [0, INT_MAX], so we normalize + * it to range [1, mlx5_esw_chains_get_prio_range(esw)] + * as with tc, where prio 0 isn't supported. + * + * We only support chain 0 of FT offload. + */ + if (!mlx5_chains_prios_supported(esw_chains(esw)) || + tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw)) || + tmp.common.chain_index) + return -EOPNOTSUPP; + + tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw)); + tmp.common.prio++; + err = mlx5e_rep_indr_offload(priv->netdev, &tmp, priv, flags); + memcpy(&f->stats, &tmp.stats, sizeof(f->stats)); + return err; + default: + return -EOPNOTSUPP; + } +} + +static void mlx5e_rep_indr_block_unbind(void *cb_priv) +{ + struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv; + + list_del(&indr_priv->list); + kfree(indr_priv); +} + +static LIST_HEAD(mlx5e_block_cb_list); + +static bool mlx5e_rep_macvlan_mode_supported(const struct net_device *dev) +{ + struct macvlan_dev *macvlan = netdev_priv(dev); + + return macvlan->mode == MACVLAN_MODE_PASSTHRU; +} + +static bool +mlx5e_rep_check_indr_block_supported(struct mlx5e_rep_priv *rpriv, + struct net_device *netdev, + struct flow_block_offload *f) +{ + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct net_device *macvlan_real_dev; + + if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS && + f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) + return false; + + if (mlx5e_tc_tun_device_to_offload(priv, netdev)) + return true; + + if (is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev) + return true; + + if (netif_is_macvlan(netdev)) { + if (!mlx5e_rep_macvlan_mode_supported(netdev)) { + netdev_warn(netdev, "Offloading ingress filter is supported only with macvlan passthru mode"); + return false; + } + + macvlan_real_dev = macvlan_dev_real_dev(netdev); + + if (macvlan_real_dev == rpriv->netdev) + return true; + if (netif_is_bond_master(macvlan_real_dev)) + return true; + } + + if (netif_is_ovs_master(netdev) && f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS && + mlx5e_tc_int_port_supported(esw)) + return true; + + return false; +} + +static int +mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch, + struct mlx5e_rep_priv *rpriv, + struct flow_block_offload *f, + flow_setup_cb_t *setup_cb, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) +{ + struct mlx5e_rep_indr_block_priv *indr_priv; + struct flow_block_cb *block_cb; + + if (!mlx5e_rep_check_indr_block_supported(rpriv, netdev, f)) + return -EOPNOTSUPP; + + f->unlocked_driver_cb = true; + f->driver_block_list = &mlx5e_block_cb_list; + + switch (f->command) { + case FLOW_BLOCK_BIND: + indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev, f->binder_type); + if (indr_priv) + return -EEXIST; + + indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL); + if (!indr_priv) + return -ENOMEM; + + indr_priv->netdev = netdev; + indr_priv->rpriv = rpriv; + indr_priv->binder_type = f->binder_type; + list_add(&indr_priv->list, + &rpriv->uplink_priv.tc_indr_block_priv_list); + + block_cb = flow_indr_block_cb_alloc(setup_cb, indr_priv, indr_priv, + mlx5e_rep_indr_block_unbind, + f, netdev, sch, data, rpriv, + cleanup); + if (IS_ERR(block_cb)) { + list_del(&indr_priv->list); + kfree(indr_priv); + return PTR_ERR(block_cb); + } + flow_block_cb_add(block_cb, f); + list_add_tail(&block_cb->driver_list, &mlx5e_block_cb_list); + + return 0; + case FLOW_BLOCK_UNBIND: + indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev, f->binder_type); + if (!indr_priv) + return -ENOENT; + + block_cb = flow_block_cb_lookup(f->block, setup_cb, indr_priv); + if (!block_cb) + return -ENOENT; + + flow_indr_block_cb_remove(block_cb, f); + list_del(&block_cb->driver_list); + return 0; + default: + return -EOPNOTSUPP; + } + return 0; +} + +static int +mlx5e_rep_indr_replace_act(struct mlx5e_rep_priv *rpriv, + struct flow_offload_action *fl_act) + +{ + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + enum mlx5_flow_namespace_type ns_type; + struct flow_action_entry *action; + struct mlx5e_tc_act *act; + bool add = false; + int i; + + /* There is no use case currently for more than one action (e.g. pedit). + * when there will be, need to handle cleaning multiple actions on err. + */ + if (!flow_offload_has_one_action(&fl_act->action)) + return -EOPNOTSUPP; + + if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS) + ns_type = MLX5_FLOW_NAMESPACE_FDB; + else + ns_type = MLX5_FLOW_NAMESPACE_KERNEL; + + flow_action_for_each(i, action, &fl_act->action) { + act = mlx5e_tc_act_get(action->id, ns_type); + if (!act) + continue; + + if (!act->offload_action) + continue; + + if (!act->offload_action(priv, fl_act, action)) + add = true; + } + + return add ? 0 : -EOPNOTSUPP; +} + +static int +mlx5e_rep_indr_destroy_act(struct mlx5e_rep_priv *rpriv, + struct flow_offload_action *fl_act) +{ + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + enum mlx5_flow_namespace_type ns_type; + struct mlx5e_tc_act *act; + + if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS) + ns_type = MLX5_FLOW_NAMESPACE_FDB; + else + ns_type = MLX5_FLOW_NAMESPACE_KERNEL; + + act = mlx5e_tc_act_get(fl_act->id, ns_type); + if (!act || !act->destroy_action) + return -EOPNOTSUPP; + + return act->destroy_action(priv, fl_act); +} + +static int +mlx5e_rep_indr_stats_act(struct mlx5e_rep_priv *rpriv, + struct flow_offload_action *fl_act) + +{ + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + enum mlx5_flow_namespace_type ns_type; + struct mlx5e_tc_act *act; + + if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS) + ns_type = MLX5_FLOW_NAMESPACE_FDB; + else + ns_type = MLX5_FLOW_NAMESPACE_KERNEL; + + act = mlx5e_tc_act_get(fl_act->id, ns_type); + if (!act || !act->stats_action) + return mlx5e_tc_fill_action_stats(priv, fl_act); + + return act->stats_action(priv, fl_act); +} + +static int +mlx5e_rep_indr_setup_act(struct mlx5e_rep_priv *rpriv, + struct flow_offload_action *fl_act) +{ + switch (fl_act->command) { + case FLOW_ACT_REPLACE: + return mlx5e_rep_indr_replace_act(rpriv, fl_act); + case FLOW_ACT_DESTROY: + return mlx5e_rep_indr_destroy_act(rpriv, fl_act); + case FLOW_ACT_STATS: + return mlx5e_rep_indr_stats_act(rpriv, fl_act); + default: + return -EOPNOTSUPP; + } +} + +static int +mlx5e_rep_indr_no_dev_setup(struct mlx5e_rep_priv *rpriv, + enum tc_setup_type type, + void *data) +{ + if (!data) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_ACT: + return mlx5e_rep_indr_setup_act(rpriv, data); + default: + return -EOPNOTSUPP; + } +} + +static +int mlx5e_rep_indr_setup_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv, + enum tc_setup_type type, void *type_data, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) +{ + if (!netdev) + return mlx5e_rep_indr_no_dev_setup(cb_priv, type, data); + + switch (type) { + case TC_SETUP_BLOCK: + return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data, + mlx5e_rep_indr_setup_tc_cb, + data, cleanup); + case TC_SETUP_FT: + return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data, + mlx5e_rep_indr_setup_ft_cb, + data, cleanup); + default: + return -EOPNOTSUPP; + } +} + +int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; + + /* init indirect block notifications */ + INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list); + + return flow_indr_dev_register(mlx5e_rep_indr_setup_cb, rpriv); +} + +void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv) +{ + flow_indr_dev_unregister(mlx5e_rep_indr_setup_cb, rpriv, + mlx5e_rep_indr_block_unbind); +} + +void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, + struct sk_buff *skb) +{ + u32 reg_c0, reg_c1, zone_restore_id, tunnel_id; + struct mlx5e_tc_update_priv tc_priv = {}; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5_tc_ct_priv *ct_priv; + struct mapping_ctx *mapping_ctx; + struct mlx5_eswitch *esw; + struct mlx5e_priv *priv; + + reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK); + if (!reg_c0 || reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG) + goto forward; + + /* If mapped_obj_id is not equal to the default flow tag then skb->mark + * is not supported and must be reset back to 0. + */ + skb->mark = 0; + + priv = netdev_priv(skb->dev); + esw = priv->mdev->priv.eswitch; + mapping_ctx = esw->offloads.reg_c0_obj_pool; + reg_c1 = be32_to_cpu(cqe->ft_metadata); + zone_restore_id = reg_c1 & ESW_ZONE_ID_MASK; + tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + ct_priv = uplink_priv->ct_priv; + +#ifdef CONFIG_MLX5_EN_IPSEC + if (!(tunnel_id >> ESW_TUN_OPTS_BITS)) { + u32 mapped_id; + u32 metadata; + + mapped_id = tunnel_id & ESW_IPSEC_RX_MAPPED_ID_MASK; + if (mapped_id && + !mlx5_esw_ipsec_rx_make_metadata(priv, mapped_id, &metadata)) + mlx5e_ipsec_offload_handle_rx_skb(priv->netdev, skb, metadata); + } +#endif + + if (!mlx5e_tc_update_skb(cqe, skb, mapping_ctx, reg_c0, ct_priv, + zone_restore_id, tunnel_id, &tc_priv)) + goto free_skb; + +forward: + if (tc_priv.skb_done) + goto free_skb; + + if (tc_priv.forward_tx) + dev_queue_xmit(skb); + else + napi_gro_receive(rq->cq.napi, skb); + + dev_put(tc_priv.fwd_dev); + + return; + +free_skb: + dev_put(tc_priv.fwd_dev); + dev_kfree_skb_any(skb); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h new file mode 100644 index 0000000000..7c9dd3a75f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies. */ + +#ifndef __MLX5_EN_REP_TC_H__ +#define __MLX5_EN_REP_TC_H__ + +#include +#include "en_tc.h" +#include "en_rep.h" + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + +int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv); +void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv); + +int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv); +void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv); + +void mlx5e_rep_tc_enable(struct mlx5e_priv *priv); +void mlx5e_rep_tc_disable(struct mlx5e_priv *priv); + +int mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv); + +void mlx5e_rep_update_flows(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + bool neigh_connected, + unsigned char ha[ETH_ALEN]); + +int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct mlx5e_neigh *m_neigh, + struct net_device *neigh_dev); +void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e); + +int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data); + +void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, + struct sk_buff *skb); + +#else /* CONFIG_MLX5_CLS_ACT */ + +struct mlx5e_rep_priv; +static inline int +mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv) { return 0; } +static inline void +mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv) {} + +static inline int +mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv) { return 0; } +static inline void +mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv) {} + +static inline void +mlx5e_rep_tc_enable(struct mlx5e_priv *priv) {} +static inline void +mlx5e_rep_tc_disable(struct mlx5e_priv *priv) {} + +static inline int +mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv) { return NOTIFY_DONE; } + +static inline int +mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) { return -EOPNOTSUPP; } + +static inline void +mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, + struct sk_buff *skb) { napi_gro_receive(rq->cq.napi, skb); } + +#endif /* CONFIG_MLX5_CLS_ACT */ + +#endif /* __MLX5_EN_REP_TC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c new file mode 100644 index 0000000000..03b119a434 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -0,0 +1,793 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Mellanox Technologies. + +#include "health.h" +#include "params.h" +#include "txrx.h" +#include "devlink.h" +#include "ptp.h" +#include "lib/tout.h" + +/* Keep this string array consistent with the MLX5E_RQ_STATE_* enums in en.h */ +static const char * const rq_sw_state_type_name[] = { + [MLX5E_RQ_STATE_ENABLED] = "enabled", + [MLX5E_RQ_STATE_RECOVERING] = "recovering", + [MLX5E_RQ_STATE_DIM] = "dim", + [MLX5E_RQ_STATE_NO_CSUM_COMPLETE] = "no_csum_complete", + [MLX5E_RQ_STATE_CSUM_FULL] = "csum_full", + [MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX] = "mini_cqe_hw_stridx", + [MLX5E_RQ_STATE_SHAMPO] = "shampo", + [MLX5E_RQ_STATE_MINI_CQE_ENHANCED] = "mini_cqe_enhanced", + [MLX5E_RQ_STATE_XSK] = "xsk", +}; + +static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state) +{ + int outlen = MLX5_ST_SZ_BYTES(query_rq_out); + void *out; + void *rqc; + int err; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_core_query_rq(dev, rqn, out); + if (err) + goto out; + + rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context); + *state = MLX5_GET(rqc, rqc, state); + +out: + kvfree(out); + return err; +} + +static int mlx5e_wait_for_icosq_flush(struct mlx5e_icosq *icosq) +{ + struct mlx5_core_dev *dev = icosq->channel->mdev; + unsigned long exp_time; + + exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR)); + + while (time_before(jiffies, exp_time)) { + if (icosq->cc == icosq->pc) + return 0; + + msleep(20); + } + + netdev_err(icosq->channel->netdev, + "Wait for ICOSQ 0x%x flush timeout (cc = 0x%x, pc = 0x%x)\n", + icosq->sqn, icosq->cc, icosq->pc); + + return -ETIMEDOUT; +} + +static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq) +{ + WARN_ONCE(icosq->cc != icosq->pc, "ICOSQ 0x%x: cc (0x%x) != pc (0x%x)\n", + icosq->sqn, icosq->cc, icosq->pc); + icosq->cc = 0; + icosq->pc = 0; +} + +static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) +{ + struct mlx5e_rq *xskrq = NULL; + struct mlx5_core_dev *mdev; + struct mlx5e_icosq *icosq; + struct net_device *dev; + struct mlx5e_rq *rq; + u8 state; + int err; + + icosq = ctx; + + mutex_lock(&icosq->channel->icosq_recovery_lock); + + /* mlx5e_close_rq cancels this work before RQ and ICOSQ are killed. */ + rq = &icosq->channel->rq; + if (test_bit(MLX5E_RQ_STATE_ENABLED, &icosq->channel->xskrq.state)) + xskrq = &icosq->channel->xskrq; + mdev = icosq->channel->mdev; + dev = icosq->channel->netdev; + err = mlx5_core_query_sq_state(mdev, icosq->sqn, &state); + if (err) { + netdev_err(dev, "Failed to query ICOSQ 0x%x state. err = %d\n", + icosq->sqn, err); + goto out; + } + + if (state != MLX5_SQC_STATE_ERR) + goto out; + + mlx5e_deactivate_rq(rq); + if (xskrq) + mlx5e_deactivate_rq(xskrq); + + err = mlx5e_wait_for_icosq_flush(icosq); + if (err) + goto out; + + mlx5e_deactivate_icosq(icosq); + + /* At this point, both the rq and the icosq are disabled */ + + err = mlx5e_health_sq_to_ready(mdev, dev, icosq->sqn); + if (err) + goto out; + + mlx5e_reset_icosq_cc_pc(icosq); + + mlx5e_free_rx_missing_descs(rq); + if (xskrq) + mlx5e_free_rx_missing_descs(xskrq); + + clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); + mlx5e_activate_icosq(icosq); + + mlx5e_activate_rq(rq); + rq->stats->recover++; + + if (xskrq) { + mlx5e_activate_rq(xskrq); + xskrq->stats->recover++; + } + + mlx5e_trigger_napi_icosq(icosq->channel); + + mutex_unlock(&icosq->channel->icosq_recovery_lock); + + return 0; +out: + clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); + mutex_unlock(&icosq->channel->icosq_recovery_lock); + return err; +} + +static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx) +{ + struct mlx5e_rq *rq = ctx; + int err; + + mlx5e_deactivate_rq(rq); + err = mlx5e_flush_rq(rq, MLX5_RQC_STATE_ERR); + clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state); + if (err) + return err; + + mlx5e_activate_rq(rq); + rq->stats->recover++; + if (rq->channel) + mlx5e_trigger_napi_icosq(rq->channel); + else + mlx5e_trigger_napi_sched(rq->cq.napi); + return 0; +} + +static int mlx5e_rx_reporter_timeout_recover(void *ctx) +{ + struct mlx5_eq_comp *eq; + struct mlx5e_rq *rq; + int err; + + rq = ctx; + eq = rq->cq.mcq.eq; + + err = mlx5e_health_channel_eq_recover(rq->netdev, eq, rq->cq.ch_stats); + if (err && rq->icosq) + clear_bit(MLX5E_SQ_STATE_ENABLED, &rq->icosq->state); + + return err; +} + +static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) +{ + return err_ctx->recover(err_ctx->ctx); +} + +static int mlx5e_rx_reporter_recover(struct devlink_health_reporter *reporter, + void *context, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_err_ctx *err_ctx = context; + + return err_ctx ? mlx5e_rx_reporter_recover_from_ctx(err_ctx) : + mlx5e_health_recover_channels(priv); +} + +static int mlx5e_reporter_icosq_diagnose(struct mlx5e_icosq *icosq, u8 hw_state, + struct devlink_fmsg *fmsg) +{ + int err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "ICOSQ"); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "sqn", icosq->sqn); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "HW state", hw_state); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "cc", icosq->cc); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "pc", icosq->pc); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "WQE size", + mlx5_wq_cyc_get_size(&icosq->wq)); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ"); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "cqn", icosq->cq.mcq.cqn); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "cc", icosq->cq.wq.cc); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&icosq->cq.wq)); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int mlx5e_health_rq_put_sw_state(struct devlink_fmsg *fmsg, struct mlx5e_rq *rq) +{ + int err; + int i; + + BUILD_BUG_ON_MSG(ARRAY_SIZE(rq_sw_state_type_name) != MLX5E_NUM_RQ_STATES, + "rq_sw_state_type_name string array must be consistent with MLX5E_RQ_STATE_* enum in en.h"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SW State"); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(rq_sw_state_type_name); ++i) { + err = devlink_fmsg_u32_pair_put(fmsg, rq_sw_state_type_name[i], + test_bit(i, &rq->state)); + if (err) + return err; + } + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int +mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq *rq, + struct devlink_fmsg *fmsg) +{ + u16 wqe_counter; + int wqes_sz; + u8 hw_state; + u16 wq_head; + int err; + + err = mlx5e_query_rq_state(rq->mdev, rq->rqn, &hw_state); + if (err) + return err; + + wqes_sz = mlx5e_rqwq_get_cur_sz(rq); + wq_head = mlx5e_rqwq_get_head(rq); + wqe_counter = mlx5e_rqwq_get_wqe_counter(rq); + + err = devlink_fmsg_u32_pair_put(fmsg, "rqn", rq->rqn); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "HW state", hw_state); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "WQE counter", wqe_counter); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "posted WQEs", wqes_sz); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "cc", wq_head); + if (err) + return err; + + err = mlx5e_health_rq_put_sw_state(fmsg, rq); + if (err) + return err; + + err = mlx5e_health_cq_diag_fmsg(&rq->cq, fmsg); + if (err) + return err; + + err = mlx5e_health_eq_diag_fmsg(rq->cq.mcq.eq, fmsg); + if (err) + return err; + + if (rq->icosq) { + struct mlx5e_icosq *icosq = rq->icosq; + u8 icosq_hw_state; + + err = mlx5_core_query_sq_state(rq->mdev, icosq->sqn, &icosq_hw_state); + if (err) + return err; + + err = mlx5e_reporter_icosq_diagnose(icosq, icosq_hw_state, fmsg); + if (err) + return err; + } + + return 0; +} + +static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, + struct devlink_fmsg *fmsg) +{ + int err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", rq->ix); + if (err) + return err; + + err = mlx5e_rx_reporter_build_diagnose_output_rq_common(rq, fmsg); + if (err) + return err; + + return devlink_fmsg_obj_nest_end(fmsg); +} + +static int mlx5e_rx_reporter_diagnose_generic_rq(struct mlx5e_rq *rq, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_priv *priv = rq->priv; + struct mlx5e_params *params; + u32 rq_stride, rq_sz; + bool real_time; + int err; + + params = &priv->channels.params; + rq_sz = mlx5e_rqwq_get_size(rq); + real_time = mlx5_is_real_time_rq(priv->mdev); + rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(priv->mdev, params, NULL)); + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ"); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "type", params->rq_wq_type); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "stride size", rq_stride); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "size", rq_sz); + if (err) + return err; + + err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC"); + if (err) + return err; + + err = mlx5e_health_cq_common_diag_fmsg(&rq->cq, fmsg); + if (err) + return err; + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int +mlx5e_rx_reporter_diagnose_common_ptp_config(struct mlx5e_priv *priv, struct mlx5e_ptp *ptp_ch, + struct devlink_fmsg *fmsg) +{ + int err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP"); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "filter_type", priv->tstamp.rx_filter); + if (err) + return err; + + err = mlx5e_rx_reporter_diagnose_generic_rq(&ptp_ch->rq, fmsg); + if (err) + return err; + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int +mlx5e_rx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_rq *generic_rq = &priv->channels.c[0]->rq; + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + int err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common config"); + if (err) + return err; + + err = mlx5e_rx_reporter_diagnose_generic_rq(generic_rq, fmsg); + if (err) + return err; + + if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) { + err = mlx5e_rx_reporter_diagnose_common_ptp_config(priv, ptp_ch, fmsg); + if (err) + return err; + } + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int mlx5e_rx_reporter_build_diagnose_output_ptp_rq(struct mlx5e_rq *rq, + struct devlink_fmsg *fmsg) +{ + int err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp"); + if (err) + return err; + + err = mlx5e_rx_reporter_build_diagnose_output_rq_common(rq, fmsg); + if (err) + return err; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + int i, err = 0; + + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + err = mlx5e_rx_reporter_diagnose_common_config(reporter, fmsg); + if (err) + goto unlock; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "RQs"); + if (err) + goto unlock; + + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; + struct mlx5e_rq *rq; + + rq = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state) ? + &c->xskrq : &c->rq; + + err = mlx5e_rx_reporter_build_diagnose_output(rq, fmsg); + if (err) + goto unlock; + } + if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) { + err = mlx5e_rx_reporter_build_diagnose_output_ptp_rq(&ptp_ch->rq, fmsg); + if (err) + goto unlock; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static int mlx5e_rx_reporter_dump_icosq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5e_txqsq *icosq = ctx; + struct mlx5_rsc_key key = {}; + int err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); + if (err) + return err; + + key.size = PAGE_SIZE; + key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "ICOSQ"); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_FULL_QPC; + key.index1 = icosq->sqn; + key.num_of_obj1 = 1; + + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_SND_BUFF; + key.num_of_obj2 = MLX5_RSC_DUMP_ALL; + + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int mlx5e_rx_reporter_dump_rq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5_rsc_key key = {}; + struct mlx5e_rq *rq = ctx; + int err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX Slice"); + if (err) + return err; + + key.size = PAGE_SIZE; + key.rsc = MLX5_SGMT_TYPE_RX_SLICE_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ"); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_FULL_QPC; + key.index1 = rq->rqn; + key.num_of_obj1 = 1; + + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "receive_buff"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_RCV_BUFF; + key.num_of_obj2 = MLX5_RSC_DUMP_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv *priv, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + struct mlx5_rsc_key key = {}; + int i, err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX Slice"); + if (err) + return err; + + key.size = PAGE_SIZE; + key.rsc = MLX5_SGMT_TYPE_RX_SLICE_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "RQs"); + if (err) + return err; + + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_rq *rq = &priv->channels.c[i]->rq; + + err = mlx5e_health_queue_dump(priv, fmsg, rq->rqn, "RQ"); + if (err) + return err; + } + + if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) { + err = mlx5e_health_queue_dump(priv, fmsg, ptp_ch->rq.rqn, "PTP RQ"); + if (err) + return err; + } + + return devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static int mlx5e_rx_reporter_dump_from_ctx(struct mlx5e_priv *priv, + struct mlx5e_err_ctx *err_ctx, + struct devlink_fmsg *fmsg) +{ + return err_ctx->dump(priv, fmsg, err_ctx->ctx); +} + +static int mlx5e_rx_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *context, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_err_ctx *err_ctx = context; + + return err_ctx ? mlx5e_rx_reporter_dump_from_ctx(priv, err_ctx, fmsg) : + mlx5e_rx_reporter_dump_all_rqs(priv, fmsg); +} + +void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq) +{ + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_icosq *icosq = rq->icosq; + struct mlx5e_priv *priv = rq->priv; + struct mlx5e_err_ctx err_ctx = {}; + char icosq_str[32] = {}; + + err_ctx.ctx = rq; + err_ctx.recover = mlx5e_rx_reporter_timeout_recover; + err_ctx.dump = mlx5e_rx_reporter_dump_rq; + + if (icosq) + snprintf(icosq_str, sizeof(icosq_str), "ICOSQ: 0x%x, ", icosq->sqn); + snprintf(err_str, sizeof(err_str), + "RX timeout on channel: %d, %s RQ: 0x%x, CQ: 0x%x", + rq->ix, icosq_str, rq->rqn, rq->cq.mcq.cqn); + + mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); +} + +void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq) +{ + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_priv *priv = rq->priv; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = rq; + err_ctx.recover = mlx5e_rx_reporter_err_rq_cqe_recover; + err_ctx.dump = mlx5e_rx_reporter_dump_rq; + snprintf(err_str, sizeof(err_str), "ERR CQE on RQ: 0x%x", rq->rqn); + + mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); +} + +void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq) +{ + struct mlx5e_priv *priv = icosq->channel->priv; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = icosq; + err_ctx.recover = mlx5e_rx_reporter_err_icosq_cqe_recover; + err_ctx.dump = mlx5e_rx_reporter_dump_icosq; + snprintf(err_str, sizeof(err_str), "ERR CQE on ICOSQ: 0x%x", icosq->sqn); + + mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); +} + +void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c) +{ + mutex_lock(&c->icosq_recovery_lock); +} + +void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c) +{ + mutex_unlock(&c->icosq_recovery_lock); +} + +static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = { + .name = "rx", + .recover = mlx5e_rx_reporter_recover, + .diagnose = mlx5e_rx_reporter_diagnose, + .dump = mlx5e_rx_reporter_dump, +}; + +#define MLX5E_REPORTER_RX_GRACEFUL_PERIOD 500 + +void mlx5e_reporter_rx_create(struct mlx5e_priv *priv) +{ + struct devlink_health_reporter *reporter; + + reporter = devlink_port_health_reporter_create(priv->netdev->devlink_port, + &mlx5_rx_reporter_ops, + MLX5E_REPORTER_RX_GRACEFUL_PERIOD, priv); + if (IS_ERR(reporter)) { + netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n", + PTR_ERR(reporter)); + return; + } + priv->rx_reporter = reporter; +} + +void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv) +{ + if (!priv->rx_reporter) + return; + + devlink_health_reporter_destroy(priv->rx_reporter); + priv->rx_reporter = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c new file mode 100644 index 0000000000..ff8242f67c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -0,0 +1,721 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "health.h" +#include "en/ptp.h" +#include "en/devlink.h" +#include "lib/tout.h" + +/* Keep this string array consistent with the MLX5E_SQ_STATE_* enums in en.h */ +static const char * const sq_sw_state_type_name[] = { + [MLX5E_SQ_STATE_ENABLED] = "enabled", + [MLX5E_SQ_STATE_MPWQE] = "mpwqe", + [MLX5E_SQ_STATE_RECOVERING] = "recovering", + [MLX5E_SQ_STATE_IPSEC] = "ipsec", + [MLX5E_SQ_STATE_DIM] = "dim", + [MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE] = "vlan_need_l2_inline", + [MLX5E_SQ_STATE_PENDING_XSK_TX] = "pending_xsk_tx", + [MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC] = "pending_tls_rx_resync", + [MLX5E_SQ_STATE_XDP_MULTIBUF] = "xdp_multibuf", +}; + +static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) +{ + struct mlx5_core_dev *dev = sq->mdev; + unsigned long exp_time; + + exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR)); + + while (time_before(jiffies, exp_time)) { + if (sq->cc == sq->pc) + return 0; + + msleep(20); + } + + netdev_err(sq->netdev, + "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n", + sq->sqn, sq->cc, sq->pc); + + return -ETIMEDOUT; +} + +static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) +{ + WARN_ONCE(sq->cc != sq->pc, + "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", + sq->sqn, sq->cc, sq->pc); + sq->cc = 0; + sq->dma_fifo_cc = 0; + sq->pc = 0; +} + +static int mlx5e_health_sq_put_sw_state(struct devlink_fmsg *fmsg, struct mlx5e_txqsq *sq) +{ + int err; + int i; + + BUILD_BUG_ON_MSG(ARRAY_SIZE(sq_sw_state_type_name) != MLX5E_NUM_SQ_STATES, + "sq_sw_state_type_name string array must be consistent with MLX5E_SQ_STATE_* enum in en.h"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SW State"); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(sq_sw_state_type_name); ++i) { + err = devlink_fmsg_u32_pair_put(fmsg, sq_sw_state_type_name[i], + test_bit(i, &sq->state)); + if (err) + return err; + } + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int mlx5e_tx_reporter_err_cqe_recover(void *ctx) +{ + struct mlx5_core_dev *mdev; + struct net_device *dev; + struct mlx5e_txqsq *sq; + u8 state; + int err; + + sq = ctx; + mdev = sq->mdev; + dev = sq->netdev; + + if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) + return 0; + + err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); + if (err) { + netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", + sq->sqn, err); + goto out; + } + + if (state != MLX5_SQC_STATE_ERR) + goto out; + + mlx5e_tx_disable_queue(sq->txq); + + err = mlx5e_wait_for_sq_flush(sq); + if (err) + goto out; + + /* At this point, no new packets will arrive from the stack as TXQ is + * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all + * pending WQEs. SQ can safely reset the SQ. + */ + + err = mlx5e_health_sq_to_ready(mdev, dev, sq->sqn); + if (err) + goto out; + + mlx5e_reset_txqsq_cc_pc(sq); + sq->stats->recover++; + clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); + mlx5e_activate_txqsq(sq); + if (sq->channel) + mlx5e_trigger_napi_icosq(sq->channel); + else + mlx5e_trigger_napi_sched(sq->cq.napi); + + return 0; +out: + clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); + return err; +} + +struct mlx5e_tx_timeout_ctx { + struct mlx5e_txqsq *sq; + signed int status; +}; + +static int mlx5e_tx_reporter_timeout_recover(void *ctx) +{ + struct mlx5e_tx_timeout_ctx *to_ctx; + struct mlx5e_priv *priv; + struct mlx5_eq_comp *eq; + struct mlx5e_txqsq *sq; + int err; + + to_ctx = ctx; + sq = to_ctx->sq; + eq = sq->cq.mcq.eq; + priv = sq->priv; + err = mlx5e_health_channel_eq_recover(sq->netdev, eq, sq->cq.ch_stats); + if (!err) { + to_ctx->status = 0; /* this sq recovered */ + return err; + } + + err = mlx5e_safe_reopen_channels(priv); + if (!err) { + to_ctx->status = 1; /* all channels recovered */ + return err; + } + + to_ctx->status = err; + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + netdev_err(priv->netdev, + "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n", + err); + + return err; +} + +static int mlx5e_tx_reporter_ptpsq_unhealthy_recover(void *ctx) +{ + struct mlx5e_ptpsq *ptpsq = ctx; + struct mlx5e_channels *chs; + struct net_device *netdev; + struct mlx5e_priv *priv; + int carrier_ok; + int err; + + if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &ptpsq->txqsq.state)) + return 0; + + priv = ptpsq->txqsq.priv; + + mutex_lock(&priv->state_lock); + chs = &priv->channels; + netdev = priv->netdev; + + carrier_ok = netif_carrier_ok(netdev); + netif_carrier_off(netdev); + + mlx5e_deactivate_priv_channels(priv); + + mlx5e_ptp_close(chs->ptp); + err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp); + + mlx5e_activate_priv_channels(priv); + + /* return carrier back if needed */ + if (carrier_ok) + netif_carrier_on(netdev); + + mutex_unlock(&priv->state_lock); + + return err; +} + +/* state lock cannot be grabbed within this function. + * It can cause a dead lock or a read-after-free. + */ +static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) +{ + return err_ctx->recover(err_ctx->ctx); +} + +static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter, + void *context, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_err_ctx *err_ctx = context; + + return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) : + mlx5e_health_recover_channels(priv); +} + +static int +mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg, + struct mlx5e_txqsq *sq, int tc) +{ + bool stopped = netif_xmit_stopped(sq->txq); + struct mlx5e_priv *priv = sq->priv; + u8 state; + int err; + + err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "tc", tc); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "HW state", state); + if (err) + return err; + + err = devlink_fmsg_bool_pair_put(fmsg, "stopped", stopped); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "cc", sq->cc); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "pc", sq->pc); + if (err) + return err; + + err = mlx5e_health_sq_put_sw_state(fmsg, sq); + if (err) + return err; + + err = mlx5e_health_cq_diag_fmsg(&sq->cq, fmsg); + if (err) + return err; + + return mlx5e_health_eq_diag_fmsg(sq->cq.mcq.eq, fmsg); +} + +static int +mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg, + struct mlx5e_txqsq *sq, int tc) +{ + int err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix); + if (err) + return err; + + err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, sq, tc); + if (err) + return err; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +static int +mlx5e_tx_reporter_build_diagnose_output_ptpsq(struct devlink_fmsg *fmsg, + struct mlx5e_ptpsq *ptpsq, int tc) +{ + int err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp"); + if (err) + return err; + + err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, &ptpsq->txqsq, tc); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS"); + if (err) + return err; + + err = mlx5e_health_cq_diag_fmsg(&ptpsq->ts_cq, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +static int +mlx5e_tx_reporter_diagnose_generic_txqsq(struct devlink_fmsg *fmsg, + struct mlx5e_txqsq *txqsq) +{ + u32 sq_stride, sq_sz; + bool real_time; + int err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); + if (err) + return err; + + real_time = mlx5_is_real_time_sq(txqsq->mdev); + sq_sz = mlx5_wq_cyc_get_size(&txqsq->wq); + sq_stride = MLX5_SEND_WQE_BB; + + err = devlink_fmsg_u64_pair_put(fmsg, "stride size", sq_stride); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "size", sq_sz); + if (err) + return err; + + err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC"); + if (err) + return err; + + err = mlx5e_health_cq_common_diag_fmsg(&txqsq->cq, fmsg); + if (err) + return err; + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int +mlx5e_tx_reporter_diagnose_generic_tx_port_ts(struct devlink_fmsg *fmsg, + struct mlx5e_ptpsq *ptpsq) +{ + int err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS"); + if (err) + return err; + + err = mlx5e_health_cq_common_diag_fmsg(&ptpsq->ts_cq, fmsg); + if (err) + return err; + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int +mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_txqsq *generic_sq = priv->txq2sq[0]; + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + struct mlx5e_ptpsq *generic_ptpsq; + int err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common Config"); + if (err) + return err; + + err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, generic_sq); + if (err) + return err; + + if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) + goto out; + + generic_ptpsq = &ptp_ch->ptpsq[0]; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP"); + if (err) + return err; + + err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, &generic_ptpsq->txqsq); + if (err) + return err; + + err = mlx5e_tx_reporter_diagnose_generic_tx_port_ts(fmsg, generic_ptpsq); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + +out: + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + + int i, tc, err = 0; + + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + err = mlx5e_tx_reporter_diagnose_common_config(reporter, fmsg); + if (err) + goto unlock; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); + if (err) + goto unlock; + + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; + + for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { + struct mlx5e_txqsq *sq = &c->sq[tc]; + + err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc); + if (err) + goto unlock; + } + } + + if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) + goto close_sqs_nest; + + for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { + err = mlx5e_tx_reporter_build_diagnose_output_ptpsq(fmsg, + &ptp_ch->ptpsq[tc], + tc); + if (err) + goto unlock; + } + +close_sqs_nest: + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + goto unlock; + +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5_rsc_key key = {}; + struct mlx5e_txqsq *sq = ctx; + int err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); + if (err) + return err; + + key.size = PAGE_SIZE; + key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_FULL_QPC; + key.index1 = sq->sqn; + key.num_of_obj1 = 1; + + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff"); + if (err) + return err; + + key.rsc = MLX5_SGMT_TYPE_SND_BUFF; + key.num_of_obj2 = MLX5_RSC_DUMP_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5e_tx_timeout_ctx *to_ctx = ctx; + + return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq); +} + +static int mlx5e_tx_reporter_ptpsq_unhealthy_dump(struct mlx5e_priv *priv, + struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5e_ptpsq *ptpsq = ctx; + + return mlx5e_tx_reporter_dump_sq(priv, fmsg, &ptpsq->txqsq); +} + +static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + struct mlx5_rsc_key key = {}; + int i, tc, err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); + if (err) + return err; + + key.size = PAGE_SIZE; + key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; + err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); + if (err) + return err; + + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; + + for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { + struct mlx5e_txqsq *sq = &c->sq[tc]; + + err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ"); + if (err) + return err; + } + } + + if (ptp_ch && test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) { + for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { + struct mlx5e_txqsq *sq = &ptp_ch->ptpsq[tc].txqsq; + + err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "PTP SQ"); + if (err) + return err; + } + } + + return devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv, + struct mlx5e_err_ctx *err_ctx, + struct devlink_fmsg *fmsg) +{ + return err_ctx->dump(priv, fmsg, err_ctx->ctx); +} + +static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *context, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_err_ctx *err_ctx = context; + + return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) : + mlx5e_tx_reporter_dump_all_sqs(priv, fmsg); +} + +void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq) +{ + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_priv *priv = sq->priv; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = sq; + err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; + err_ctx.dump = mlx5e_tx_reporter_dump_sq; + snprintf(err_str, sizeof(err_str), "ERR CQE on SQ: 0x%x", sq->sqn); + + mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); +} + +int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) +{ + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_tx_timeout_ctx to_ctx = {}; + struct mlx5e_priv *priv = sq->priv; + struct mlx5e_err_ctx err_ctx = {}; + + to_ctx.sq = sq; + err_ctx.ctx = &to_ctx; + err_ctx.recover = mlx5e_tx_reporter_timeout_recover; + err_ctx.dump = mlx5e_tx_reporter_timeout_dump; + snprintf(err_str, sizeof(err_str), + "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u", + sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, + jiffies_to_usecs(jiffies - READ_ONCE(sq->txq->trans_start))); + + mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); + return to_ctx.status; +} + +void mlx5e_reporter_tx_ptpsq_unhealthy(struct mlx5e_ptpsq *ptpsq) +{ + struct mlx5e_ptp_metadata_map *map = &ptpsq->metadata_map; + char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_txqsq *txqsq = &ptpsq->txqsq; + struct mlx5e_cq *ts_cq = &ptpsq->ts_cq; + struct mlx5e_priv *priv = txqsq->priv; + struct mlx5e_err_ctx err_ctx = {}; + + err_ctx.ctx = ptpsq; + err_ctx.recover = mlx5e_tx_reporter_ptpsq_unhealthy_recover; + err_ctx.dump = mlx5e_tx_reporter_ptpsq_unhealthy_dump; + snprintf(err_str, sizeof(err_str), + "Unhealthy TX port TS queue: %d, SQ: 0x%x, CQ: 0x%x, Undelivered CQEs: %u Map Capacity: %u", + txqsq->ch_ix, txqsq->sqn, ts_cq->mcq.cqn, map->undelivered_counter, map->capacity); + + mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); +} + +static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { + .name = "tx", + .recover = mlx5e_tx_reporter_recover, + .diagnose = mlx5e_tx_reporter_diagnose, + .dump = mlx5e_tx_reporter_dump, +}; + +#define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 + +void mlx5e_reporter_tx_create(struct mlx5e_priv *priv) +{ + struct devlink_health_reporter *reporter; + + reporter = devlink_port_health_reporter_create(priv->netdev->devlink_port, + &mlx5_tx_reporter_ops, + MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv); + if (IS_ERR(reporter)) { + netdev_warn(priv->netdev, + "Failed to create tx reporter, err = %ld\n", + PTR_ERR(reporter)); + return; + } + priv->tx_reporter = reporter; +} + +void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv) +{ + if (!priv->tx_reporter) + return; + + devlink_health_reporter_destroy(priv->tx_reporter); + priv->tx_reporter = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c new file mode 100644 index 0000000000..b915fb29dd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#include "rqt.h" +#include + +void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir, + unsigned int num_channels) +{ + unsigned int i; + + for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) + indir->table[i] = i % num_channels; +} + +static int mlx5e_rqt_init(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, + u16 max_size, u32 *init_rqns, u16 init_size) +{ + void *rqtc; + int inlen; + int err; + u32 *in; + int i; + + rqt->mdev = mdev; + rqt->size = max_size; + + inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * init_size; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); + + MLX5_SET(rqtc, rqtc, rqt_max_size, rqt->size); + + MLX5_SET(rqtc, rqtc, rqt_actual_size, init_size); + for (i = 0; i < init_size; i++) + MLX5_SET(rqtc, rqtc, rq_num[i], init_rqns[i]); + + err = mlx5_core_create_rqt(rqt->mdev, in, inlen, &rqt->rqtn); + + kvfree(in); + return err; +} + +int mlx5e_rqt_init_direct(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, + bool indir_enabled, u32 init_rqn) +{ + u16 max_size = indir_enabled ? MLX5E_INDIR_RQT_SIZE : 1; + + return mlx5e_rqt_init(rqt, mdev, max_size, &init_rqn, 1); +} + +static int mlx5e_bits_invert(unsigned long a, int size) +{ + int inv = 0; + int i; + + for (i = 0; i < size; i++) + inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i; + + return inv; +} + +static int mlx5e_calc_indir_rqns(u32 *rss_rqns, u32 *rqns, unsigned int num_rqns, + u8 hfunc, struct mlx5e_rss_params_indir *indir) +{ + unsigned int i; + + for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) { + unsigned int ix = i; + + if (hfunc == ETH_RSS_HASH_XOR) + ix = mlx5e_bits_invert(ix, ilog2(MLX5E_INDIR_RQT_SIZE)); + + ix = indir->table[ix]; + + if (WARN_ON(ix >= num_rqns)) + /* Could be a bug in the driver or in the kernel part of + * ethtool: indir table refers to non-existent RQs. + */ + return -EINVAL; + rss_rqns[i] = rqns[ix]; + } + + return 0; +} + +int mlx5e_rqt_init_indir(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, + u32 *rqns, unsigned int num_rqns, + u8 hfunc, struct mlx5e_rss_params_indir *indir) +{ + u32 *rss_rqns; + int err; + + rss_rqns = kvmalloc_array(MLX5E_INDIR_RQT_SIZE, sizeof(*rss_rqns), GFP_KERNEL); + if (!rss_rqns) + return -ENOMEM; + + err = mlx5e_calc_indir_rqns(rss_rqns, rqns, num_rqns, hfunc, indir); + if (err) + goto out; + + err = mlx5e_rqt_init(rqt, mdev, MLX5E_INDIR_RQT_SIZE, rss_rqns, MLX5E_INDIR_RQT_SIZE); + +out: + kvfree(rss_rqns); + return err; +} + +void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt) +{ + mlx5_core_destroy_rqt(rqt->mdev, rqt->rqtn); +} + +static int mlx5e_rqt_redirect(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int size) +{ + unsigned int i; + void *rqtc; + int inlen; + u32 *in; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * size; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx); + + MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1); + MLX5_SET(rqtc, rqtc, rqt_actual_size, size); + for (i = 0; i < size; i++) + MLX5_SET(rqtc, rqtc, rq_num[i], rqns[i]); + + err = mlx5_core_modify_rqt(rqt->mdev, rqt->rqtn, in, inlen); + + kvfree(in); + return err; +} + +int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn) +{ + return mlx5e_rqt_redirect(rqt, &rqn, 1); +} + +int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_rqns, + u8 hfunc, struct mlx5e_rss_params_indir *indir) +{ + u32 *rss_rqns; + int err; + + if (WARN_ON(rqt->size != MLX5E_INDIR_RQT_SIZE)) + return -EINVAL; + + rss_rqns = kvmalloc_array(MLX5E_INDIR_RQT_SIZE, sizeof(*rss_rqns), GFP_KERNEL); + if (!rss_rqns) + return -ENOMEM; + + err = mlx5e_calc_indir_rqns(rss_rqns, rqns, num_rqns, hfunc, indir); + if (err) + goto out; + + err = mlx5e_rqt_redirect(rqt, rss_rqns, MLX5E_INDIR_RQT_SIZE); + +out: + kvfree(rss_rqns); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h new file mode 100644 index 0000000000..60c985a12f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_EN_RQT_H__ +#define __MLX5_EN_RQT_H__ + +#include + +#define MLX5E_INDIR_RQT_SIZE (1 << 8) + +struct mlx5_core_dev; + +struct mlx5e_rss_params_indir { + u32 table[MLX5E_INDIR_RQT_SIZE]; +}; + +void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir, + unsigned int num_channels); + +struct mlx5e_rqt { + struct mlx5_core_dev *mdev; + u32 rqtn; + u16 size; +}; + +int mlx5e_rqt_init_direct(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, + bool indir_enabled, u32 init_rqn); +int mlx5e_rqt_init_indir(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, + u32 *rqns, unsigned int num_rqns, + u8 hfunc, struct mlx5e_rss_params_indir *indir); +void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt); + +static inline u32 mlx5e_rqt_get_rqtn(struct mlx5e_rqt *rqt) +{ + return rqt->rqtn; +} + +int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn); +int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_rqns, + u8 hfunc, struct mlx5e_rss_params_indir *indir); + +#endif /* __MLX5_EN_RQT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c new file mode 100644 index 0000000000..7f93426b88 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c @@ -0,0 +1,606 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. + +#include "rss.h" + +#define mlx5e_rss_warn(__dev, format, ...) \ + dev_warn((__dev)->device, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +static const struct mlx5e_rss_params_traffic_type rss_default_config[MLX5E_NUM_INDIR_TIRS] = { + [MLX5_TT_IPV4_TCP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5_TT_IPV6_TCP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5_TT_IPV4_UDP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5_TT_IPV6_UDP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5_TT_IPV4_IPSEC_AH] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5_TT_IPV6_IPSEC_AH] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5_TT_IPV4_IPSEC_ESP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5_TT_IPV6_IPSEC_ESP] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5_TT_IPV4] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP, + }, + [MLX5_TT_IPV6] = { + .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP, + }, +}; + +struct mlx5e_rss_params_traffic_type +mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt) +{ + return rss_default_config[tt]; +} + +struct mlx5e_rss { + struct mlx5e_rss_params_hash hash; + struct mlx5e_rss_params_indir indir; + u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_tir *tir[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_tir *inner_tir[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_rqt rqt; + struct mlx5_core_dev *mdev; + u32 drop_rqn; + bool inner_ft_support; + bool enabled; + refcount_t refcnt; +}; + +struct mlx5e_rss *mlx5e_rss_alloc(void) +{ + return kvzalloc(sizeof(struct mlx5e_rss), GFP_KERNEL); +} + +void mlx5e_rss_free(struct mlx5e_rss *rss) +{ + kvfree(rss); +} + +static void mlx5e_rss_params_init(struct mlx5e_rss *rss) +{ + enum mlx5_traffic_types tt; + + rss->hash.hfunc = ETH_RSS_HASH_TOP; + netdev_rss_key_fill(rss->hash.toeplitz_hash_key, + sizeof(rss->hash.toeplitz_hash_key)); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + rss->rx_hash_fields[tt] = + mlx5e_rss_get_default_tt_config(tt).rx_hash_fields; +} + +static struct mlx5e_tir **rss_get_tirp(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + bool inner) +{ + return inner ? &rss->inner_tir[tt] : &rss->tir[tt]; +} + +static struct mlx5e_tir *rss_get_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + bool inner) +{ + return *rss_get_tirp(rss, tt, inner); +} + +static struct mlx5e_rss_params_traffic_type +mlx5e_rss_get_tt_config(struct mlx5e_rss *rss, enum mlx5_traffic_types tt) +{ + struct mlx5e_rss_params_traffic_type rss_tt; + + rss_tt = mlx5e_rss_get_default_tt_config(tt); + rss_tt.rx_hash_fields = rss->rx_hash_fields[tt]; + return rss_tt; +} + +static int mlx5e_rss_create_tir(struct mlx5e_rss *rss, + enum mlx5_traffic_types tt, + const struct mlx5e_packet_merge_param *init_pkt_merge_param, + bool inner) +{ + struct mlx5e_rss_params_traffic_type rss_tt; + struct mlx5e_tir_builder *builder; + struct mlx5e_tir **tir_p; + struct mlx5e_tir *tir; + u32 rqtn; + int err; + + if (inner && !rss->inner_ft_support) { + mlx5e_rss_warn(rss->mdev, + "Cannot create inner indirect TIR[%d], RSS inner FT is not supported.\n", + tt); + return -EINVAL; + } + + tir_p = rss_get_tirp(rss, tt, inner); + if (*tir_p) + return -EINVAL; + + tir = kvzalloc(sizeof(*tir), GFP_KERNEL); + if (!tir) + return -ENOMEM; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) { + err = -ENOMEM; + goto free_tir; + } + + rqtn = mlx5e_rqt_get_rqtn(&rss->rqt); + mlx5e_tir_builder_build_rqt(builder, rss->mdev->mlx5e_res.hw_objs.td.tdn, + rqtn, rss->inner_ft_support); + mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param); + rss_tt = mlx5e_rss_get_tt_config(rss, tt); + mlx5e_tir_builder_build_rss(builder, &rss->hash, &rss_tt, inner); + + err = mlx5e_tir_init(tir, builder, rss->mdev, true); + mlx5e_tir_builder_free(builder); + if (err) { + mlx5e_rss_warn(rss->mdev, "Failed to create %sindirect TIR: err = %d, tt = %d\n", + inner ? "inner " : "", err, tt); + goto free_tir; + } + + *tir_p = tir; + return 0; + +free_tir: + kvfree(tir); + return err; +} + +static void mlx5e_rss_destroy_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + bool inner) +{ + struct mlx5e_tir **tir_p; + struct mlx5e_tir *tir; + + tir_p = rss_get_tirp(rss, tt, inner); + if (!*tir_p) + return; + + tir = *tir_p; + mlx5e_tir_destroy(tir); + kvfree(tir); + *tir_p = NULL; +} + +static int mlx5e_rss_create_tirs(struct mlx5e_rss *rss, + const struct mlx5e_packet_merge_param *init_pkt_merge_param, + bool inner) +{ + enum mlx5_traffic_types tt, max_tt; + int err; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + err = mlx5e_rss_create_tir(rss, tt, init_pkt_merge_param, inner); + if (err) + goto err_destroy_tirs; + } + + return 0; + +err_destroy_tirs: + max_tt = tt; + for (tt = 0; tt < max_tt; tt++) + mlx5e_rss_destroy_tir(rss, tt, inner); + return err; +} + +static void mlx5e_rss_destroy_tirs(struct mlx5e_rss *rss, bool inner) +{ + enum mlx5_traffic_types tt; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + mlx5e_rss_destroy_tir(rss, tt, inner); +} + +static int mlx5e_rss_update_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + bool inner) +{ + struct mlx5e_rss_params_traffic_type rss_tt; + struct mlx5e_tir_builder *builder; + struct mlx5e_tir *tir; + int err; + + tir = rss_get_tir(rss, tt, inner); + if (!tir) + return 0; + + builder = mlx5e_tir_builder_alloc(true); + if (!builder) + return -ENOMEM; + + rss_tt = mlx5e_rss_get_tt_config(rss, tt); + + mlx5e_tir_builder_build_rss(builder, &rss->hash, &rss_tt, inner); + err = mlx5e_tir_modify(tir, builder); + + mlx5e_tir_builder_free(builder); + return err; +} + +static int mlx5e_rss_update_tirs(struct mlx5e_rss *rss) +{ + enum mlx5_traffic_types tt; + int err, retval; + + retval = 0; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + err = mlx5e_rss_update_tir(rss, tt, false); + if (err) { + retval = retval ? : err; + mlx5e_rss_warn(rss->mdev, + "Failed to update RSS hash of indirect TIR for traffic type %d: err = %d\n", + tt, err); + } + + if (!rss->inner_ft_support) + continue; + + err = mlx5e_rss_update_tir(rss, tt, true); + if (err) { + retval = retval ? : err; + mlx5e_rss_warn(rss->mdev, + "Failed to update RSS hash of inner indirect TIR for traffic type %d: err = %d\n", + tt, err); + } + } + return retval; +} + +int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev, + bool inner_ft_support, u32 drop_rqn) +{ + rss->mdev = mdev; + rss->inner_ft_support = inner_ft_support; + rss->drop_rqn = drop_rqn; + + mlx5e_rss_params_init(rss); + refcount_set(&rss->refcnt, 1); + + return mlx5e_rqt_init_direct(&rss->rqt, mdev, true, drop_rqn); +} + +int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev, + bool inner_ft_support, u32 drop_rqn, + const struct mlx5e_packet_merge_param *init_pkt_merge_param) +{ + int err; + + err = mlx5e_rss_init_no_tirs(rss, mdev, inner_ft_support, drop_rqn); + if (err) + goto err_out; + + err = mlx5e_rss_create_tirs(rss, init_pkt_merge_param, false); + if (err) + goto err_destroy_rqt; + + if (inner_ft_support) { + err = mlx5e_rss_create_tirs(rss, init_pkt_merge_param, true); + if (err) + goto err_destroy_tirs; + } + + return 0; + +err_destroy_tirs: + mlx5e_rss_destroy_tirs(rss, false); +err_destroy_rqt: + mlx5e_rqt_destroy(&rss->rqt); +err_out: + return err; +} + +int mlx5e_rss_cleanup(struct mlx5e_rss *rss) +{ + if (!refcount_dec_if_one(&rss->refcnt)) + return -EBUSY; + + mlx5e_rss_destroy_tirs(rss, false); + + if (rss->inner_ft_support) + mlx5e_rss_destroy_tirs(rss, true); + + mlx5e_rqt_destroy(&rss->rqt); + + return 0; +} + +void mlx5e_rss_refcnt_inc(struct mlx5e_rss *rss) +{ + refcount_inc(&rss->refcnt); +} + +void mlx5e_rss_refcnt_dec(struct mlx5e_rss *rss) +{ + refcount_dec(&rss->refcnt); +} + +unsigned int mlx5e_rss_refcnt_read(struct mlx5e_rss *rss) +{ + return refcount_read(&rss->refcnt); +} + +u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + bool inner) +{ + struct mlx5e_tir *tir; + + WARN_ON(inner && !rss->inner_ft_support); + tir = rss_get_tir(rss, tt, inner); + WARN_ON(!tir); + + return mlx5e_tir_get_tirn(tir); +} + +/* Fill the "tirn" output parameter. + * Create the requested TIR if it's its first usage. + */ +int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss, + enum mlx5_traffic_types tt, + const struct mlx5e_packet_merge_param *init_pkt_merge_param, + bool inner, u32 *tirn) +{ + struct mlx5e_tir *tir; + + tir = rss_get_tir(rss, tt, inner); + if (!tir) { /* TIR doesn't exist, create one */ + int err; + + err = mlx5e_rss_create_tir(rss, tt, init_pkt_merge_param, inner); + if (err) + return err; + tir = rss_get_tir(rss, tt, inner); + } + + *tirn = mlx5e_tir_get_tirn(tir); + return 0; +} + +static int mlx5e_rss_apply(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns) +{ + int err; + + err = mlx5e_rqt_redirect_indir(&rss->rqt, rqns, num_rqns, rss->hash.hfunc, &rss->indir); + if (err) + mlx5e_rss_warn(rss->mdev, "Failed to redirect RQT %#x to channels: err = %d\n", + mlx5e_rqt_get_rqtn(&rss->rqt), err); + return err; +} + +void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns) +{ + rss->enabled = true; + mlx5e_rss_apply(rss, rqns, num_rqns); +} + +void mlx5e_rss_disable(struct mlx5e_rss *rss) +{ + int err; + + rss->enabled = false; + err = mlx5e_rqt_redirect_direct(&rss->rqt, rss->drop_rqn); + if (err) + mlx5e_rss_warn(rss->mdev, "Failed to redirect RQT %#x to drop RQ %#x: err = %d\n", + mlx5e_rqt_get_rqtn(&rss->rqt), rss->drop_rqn, err); +} + +int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss, + struct mlx5e_packet_merge_param *pkt_merge_param) +{ + struct mlx5e_tir_builder *builder; + enum mlx5_traffic_types tt; + int err, final_err; + + builder = mlx5e_tir_builder_alloc(true); + if (!builder) + return -ENOMEM; + + mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param); + + final_err = 0; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + struct mlx5e_tir *tir; + + tir = rss_get_tir(rss, tt, false); + if (!tir) + goto inner_tir; + err = mlx5e_tir_modify(tir, builder); + if (err) { + mlx5e_rss_warn(rss->mdev, "Failed to update packet merge state of indirect TIR %#x for traffic type %d: err = %d\n", + mlx5e_tir_get_tirn(tir), tt, err); + if (!final_err) + final_err = err; + } + +inner_tir: + if (!rss->inner_ft_support) + continue; + + tir = rss_get_tir(rss, tt, true); + if (!tir) + continue; + err = mlx5e_tir_modify(tir, builder); + if (err) { + mlx5e_rss_warn(rss->mdev, "Failed to update packet merge state of inner indirect TIR %#x for traffic type %d: err = %d\n", + mlx5e_tir_get_tirn(tir), tt, err); + if (!final_err) + final_err = err; + } + } + + mlx5e_tir_builder_free(builder); + return final_err; +} + +int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc) +{ + unsigned int i; + + if (indir) + for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) + indir[i] = rss->indir.table[i]; + + if (key) + memcpy(key, rss->hash.toeplitz_hash_key, + sizeof(rss->hash.toeplitz_hash_key)); + + if (hfunc) + *hfunc = rss->hash.hfunc; + + return 0; +} + +int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir, + const u8 *key, const u8 *hfunc, + u32 *rqns, unsigned int num_rqns) +{ + bool changed_indir = false; + bool changed_hash = false; + struct mlx5e_rss *old_rss; + int err = 0; + + old_rss = mlx5e_rss_alloc(); + if (!old_rss) + return -ENOMEM; + + *old_rss = *rss; + + if (hfunc && *hfunc != rss->hash.hfunc) { + switch (*hfunc) { + case ETH_RSS_HASH_XOR: + case ETH_RSS_HASH_TOP: + break; + default: + err = -EINVAL; + goto out; + } + changed_hash = true; + changed_indir = true; + rss->hash.hfunc = *hfunc; + } + + if (key) { + if (rss->hash.hfunc == ETH_RSS_HASH_TOP) + changed_hash = true; + memcpy(rss->hash.toeplitz_hash_key, key, + sizeof(rss->hash.toeplitz_hash_key)); + } + + if (indir) { + unsigned int i; + + changed_indir = true; + + for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) + rss->indir.table[i] = indir[i]; + } + + if (changed_indir && rss->enabled) { + err = mlx5e_rss_apply(rss, rqns, num_rqns); + if (err) { + *rss = *old_rss; + goto out; + } + } + + if (changed_hash) + mlx5e_rss_update_tirs(rss); + +out: + mlx5e_rss_free(old_rss); + return err; +} + +struct mlx5e_rss_params_hash mlx5e_rss_get_hash(struct mlx5e_rss *rss) +{ + return rss->hash; +} + +u8 mlx5e_rss_get_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt) +{ + return rss->rx_hash_fields[tt]; +} + +int mlx5e_rss_set_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + u8 rx_hash_fields) +{ + u8 old_rx_hash_fields; + int err; + + old_rx_hash_fields = rss->rx_hash_fields[tt]; + + if (old_rx_hash_fields == rx_hash_fields) + return 0; + + rss->rx_hash_fields[tt] = rx_hash_fields; + + err = mlx5e_rss_update_tir(rss, tt, false); + if (err) { + rss->rx_hash_fields[tt] = old_rx_hash_fields; + mlx5e_rss_warn(rss->mdev, + "Failed to update RSS hash fields of indirect TIR for traffic type %d: err = %d\n", + tt, err); + return err; + } + + if (!(rss->inner_ft_support)) + return 0; + + err = mlx5e_rss_update_tir(rss, tt, true); + if (err) { + /* Partial update happened. Try to revert - it may fail too, but + * there is nothing more we can do. + */ + rss->rx_hash_fields[tt] = old_rx_hash_fields; + mlx5e_rss_warn(rss->mdev, + "Failed to update RSS hash fields of inner indirect TIR for traffic type %d: err = %d\n", + tt, err); + if (mlx5e_rss_update_tir(rss, tt, false)) + mlx5e_rss_warn(rss->mdev, + "Partial update of RSS hash fields happened: failed to revert indirect TIR for traffic type %d to the old values\n", + tt); + } + + return err; +} + +void mlx5e_rss_set_indir_uniform(struct mlx5e_rss *rss, unsigned int nch) +{ + mlx5e_rss_params_indir_init_uniform(&rss->indir, nch); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h new file mode 100644 index 0000000000..c6b2164163 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#ifndef __MLX5_EN_RSS_H__ +#define __MLX5_EN_RSS_H__ + +#include "rqt.h" +#include "tir.h" +#include "fs.h" + +struct mlx5e_rss_params_traffic_type +mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt); + +struct mlx5e_rss; + +struct mlx5e_rss *mlx5e_rss_alloc(void); +void mlx5e_rss_free(struct mlx5e_rss *rss); +int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev, + bool inner_ft_support, u32 drop_rqn, + const struct mlx5e_packet_merge_param *init_pkt_merge_param); +int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev, + bool inner_ft_support, u32 drop_rqn); +int mlx5e_rss_cleanup(struct mlx5e_rss *rss); + +void mlx5e_rss_refcnt_inc(struct mlx5e_rss *rss); +void mlx5e_rss_refcnt_dec(struct mlx5e_rss *rss); +unsigned int mlx5e_rss_refcnt_read(struct mlx5e_rss *rss); + +u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + bool inner); +int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss, + enum mlx5_traffic_types tt, + const struct mlx5e_packet_merge_param *init_pkt_merge_param, + bool inner, u32 *tirn); + +void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns); +void mlx5e_rss_disable(struct mlx5e_rss *rss); + +int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss, + struct mlx5e_packet_merge_param *pkt_merge_param); +int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc); +int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir, + const u8 *key, const u8 *hfunc, + u32 *rqns, unsigned int num_rqns); +struct mlx5e_rss_params_hash mlx5e_rss_get_hash(struct mlx5e_rss *rss); +u8 mlx5e_rss_get_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt); +int mlx5e_rss_set_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + u8 rx_hash_fields); +void mlx5e_rss_set_indir_uniform(struct mlx5e_rss *rss, unsigned int nch); +#endif /* __MLX5_EN_RSS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c new file mode 100644 index 0000000000..56e6b8c750 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -0,0 +1,655 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#include "rx_res.h" +#include "channels.h" +#include "params.h" + +#define MLX5E_MAX_NUM_RSS 16 + +struct mlx5e_rx_res { + struct mlx5_core_dev *mdev; + enum mlx5e_rx_res_features features; + unsigned int max_nch; + u32 drop_rqn; + + struct mlx5e_packet_merge_param pkt_merge_param; + struct rw_semaphore pkt_merge_param_sem; + + struct mlx5e_rss *rss[MLX5E_MAX_NUM_RSS]; + bool rss_active; + u32 rss_rqns[MLX5E_INDIR_RQT_SIZE]; + unsigned int rss_nch; + + struct { + struct mlx5e_rqt direct_rqt; + struct mlx5e_tir direct_tir; + } *channels; + + struct { + struct mlx5e_rqt rqt; + struct mlx5e_tir tir; + } ptp; +}; + +/* API for rx_res_rss_* */ + +static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res, + unsigned int init_nch) +{ + bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + struct mlx5e_rss *rss; + int err; + + if (WARN_ON(res->rss[0])) + return -EINVAL; + + rss = mlx5e_rss_alloc(); + if (!rss) + return -ENOMEM; + + err = mlx5e_rss_init(rss, res->mdev, inner_ft_support, res->drop_rqn, + &res->pkt_merge_param); + if (err) + goto err_rss_free; + + mlx5e_rss_set_indir_uniform(rss, init_nch); + + res->rss[0] = rss; + + return 0; + +err_rss_free: + mlx5e_rss_free(rss); + return err; +} + +int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch) +{ + bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + struct mlx5e_rss *rss; + int err, i; + + for (i = 1; i < MLX5E_MAX_NUM_RSS; i++) + if (!res->rss[i]) + break; + + if (i == MLX5E_MAX_NUM_RSS) + return -ENOSPC; + + rss = mlx5e_rss_alloc(); + if (!rss) + return -ENOMEM; + + err = mlx5e_rss_init_no_tirs(rss, res->mdev, inner_ft_support, res->drop_rqn); + if (err) + goto err_rss_free; + + mlx5e_rss_set_indir_uniform(rss, init_nch); + if (res->rss_active) + mlx5e_rss_enable(rss, res->rss_rqns, res->rss_nch); + + res->rss[i] = rss; + *rss_idx = i; + + return 0; + +err_rss_free: + mlx5e_rss_free(rss); + return err; +} + +static int __mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx) +{ + struct mlx5e_rss *rss = res->rss[rss_idx]; + int err; + + err = mlx5e_rss_cleanup(rss); + if (err) + return err; + + mlx5e_rss_free(rss); + res->rss[rss_idx] = NULL; + + return 0; +} + +int mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx) +{ + struct mlx5e_rss *rss; + + if (rss_idx >= MLX5E_MAX_NUM_RSS) + return -EINVAL; + + rss = res->rss[rss_idx]; + if (!rss) + return -EINVAL; + + return __mlx5e_rx_res_rss_destroy(res, rss_idx); +} + +static void mlx5e_rx_res_rss_destroy_all(struct mlx5e_rx_res *res) +{ + int i; + + for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) { + struct mlx5e_rss *rss = res->rss[i]; + int err; + + if (!rss) + continue; + + err = __mlx5e_rx_res_rss_destroy(res, i); + if (err) { + unsigned int refcount; + + refcount = mlx5e_rss_refcnt_read(rss); + mlx5_core_warn(res->mdev, + "Failed to destroy RSS context %d, refcount = %u, err = %d\n", + i, refcount, err); + } + } +} + +static void mlx5e_rx_res_rss_enable(struct mlx5e_rx_res *res) +{ + int i; + + res->rss_active = true; + + for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) { + struct mlx5e_rss *rss = res->rss[i]; + + if (!rss) + continue; + mlx5e_rss_enable(rss, res->rss_rqns, res->rss_nch); + } +} + +static void mlx5e_rx_res_rss_disable(struct mlx5e_rx_res *res) +{ + int i; + + res->rss_active = false; + + for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) { + struct mlx5e_rss *rss = res->rss[i]; + + if (!rss) + continue; + mlx5e_rss_disable(rss); + } +} + +/* Updates the indirection table SW shadow, does not update the HW resources yet */ +void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch) +{ + WARN_ON_ONCE(res->rss_active); + mlx5e_rss_set_indir_uniform(res->rss[0], nch); +} + +int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, + u32 *indir, u8 *key, u8 *hfunc) +{ + struct mlx5e_rss *rss; + + if (rss_idx >= MLX5E_MAX_NUM_RSS) + return -EINVAL; + + rss = res->rss[rss_idx]; + if (!rss) + return -ENOENT; + + return mlx5e_rss_get_rxfh(rss, indir, key, hfunc); +} + +int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, + const u32 *indir, const u8 *key, const u8 *hfunc) +{ + struct mlx5e_rss *rss; + + if (rss_idx >= MLX5E_MAX_NUM_RSS) + return -EINVAL; + + rss = res->rss[rss_idx]; + if (!rss) + return -ENOENT; + + return mlx5e_rss_set_rxfh(rss, indir, key, hfunc, res->rss_rqns, res->rss_nch); +} + +int mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, u32 rss_idx, + enum mlx5_traffic_types tt) +{ + struct mlx5e_rss *rss; + + if (rss_idx >= MLX5E_MAX_NUM_RSS) + return -EINVAL; + + rss = res->rss[rss_idx]; + if (!rss) + return -ENOENT; + + return mlx5e_rss_get_hash_fields(rss, tt); +} + +int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, u32 rss_idx, + enum mlx5_traffic_types tt, u8 rx_hash_fields) +{ + struct mlx5e_rss *rss; + + if (rss_idx >= MLX5E_MAX_NUM_RSS) + return -EINVAL; + + rss = res->rss[rss_idx]; + if (!rss) + return -ENOENT; + + return mlx5e_rss_set_hash_fields(rss, tt, rx_hash_fields); +} + +int mlx5e_rx_res_rss_cnt(struct mlx5e_rx_res *res) +{ + int i, cnt; + + cnt = 0; + for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) + if (res->rss[i]) + cnt++; + + return cnt; +} + +int mlx5e_rx_res_rss_index(struct mlx5e_rx_res *res, struct mlx5e_rss *rss) +{ + int i; + + if (!rss) + return -EINVAL; + + for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) + if (rss == res->rss[i]) + return i; + + return -ENOENT; +} + +struct mlx5e_rss *mlx5e_rx_res_rss_get(struct mlx5e_rx_res *res, u32 rss_idx) +{ + if (rss_idx >= MLX5E_MAX_NUM_RSS) + return NULL; + + return res->rss[rss_idx]; +} + +/* End of API rx_res_rss_* */ + +struct mlx5e_rx_res *mlx5e_rx_res_alloc(void) +{ + return kvzalloc(sizeof(struct mlx5e_rx_res), GFP_KERNEL); +} + +static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res) +{ + bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + struct mlx5e_tir_builder *builder; + int err = 0; + int ix; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + res->channels = kvcalloc(res->max_nch, sizeof(*res->channels), GFP_KERNEL); + if (!res->channels) { + err = -ENOMEM; + goto out; + } + + for (ix = 0; ix < res->max_nch; ix++) { + err = mlx5e_rqt_init_direct(&res->channels[ix].direct_rqt, + res->mdev, false, res->drop_rqn); + if (err) { + mlx5_core_warn(res->mdev, "Failed to create a direct RQT: err = %d, ix = %u\n", + err, ix); + goto err_destroy_direct_rqts; + } + } + + for (ix = 0; ix < res->max_nch; ix++) { + mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, + mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), + inner_ft_support); + mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param); + mlx5e_tir_builder_build_direct(builder); + + err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true); + if (err) { + mlx5_core_warn(res->mdev, "Failed to create a direct TIR: err = %d, ix = %u\n", + err, ix); + goto err_destroy_direct_tirs; + } + + mlx5e_tir_builder_clear(builder); + } + + goto out; + +err_destroy_direct_tirs: + while (--ix >= 0) + mlx5e_tir_destroy(&res->channels[ix].direct_tir); + + ix = res->max_nch; +err_destroy_direct_rqts: + while (--ix >= 0) + mlx5e_rqt_destroy(&res->channels[ix].direct_rqt); + + kvfree(res->channels); + +out: + mlx5e_tir_builder_free(builder); + + return err; +} + +static int mlx5e_rx_res_ptp_init(struct mlx5e_rx_res *res) +{ + bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + struct mlx5e_tir_builder *builder; + int err; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + err = mlx5e_rqt_init_direct(&res->ptp.rqt, res->mdev, false, res->drop_rqn); + if (err) + goto out; + + /* Separated from the channels RQs, does not share pkt_merge state with them */ + mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, + mlx5e_rqt_get_rqtn(&res->ptp.rqt), + inner_ft_support); + mlx5e_tir_builder_build_direct(builder); + + err = mlx5e_tir_init(&res->ptp.tir, builder, res->mdev, true); + if (err) + goto err_destroy_ptp_rqt; + + goto out; + +err_destroy_ptp_rqt: + mlx5e_rqt_destroy(&res->ptp.rqt); + +out: + mlx5e_tir_builder_free(builder); + return err; +} + +static void mlx5e_rx_res_channels_destroy(struct mlx5e_rx_res *res) +{ + unsigned int ix; + + for (ix = 0; ix < res->max_nch; ix++) { + mlx5e_tir_destroy(&res->channels[ix].direct_tir); + mlx5e_rqt_destroy(&res->channels[ix].direct_rqt); + } + + kvfree(res->channels); +} + +static void mlx5e_rx_res_ptp_destroy(struct mlx5e_rx_res *res) +{ + mlx5e_tir_destroy(&res->ptp.tir); + mlx5e_rqt_destroy(&res->ptp.rqt); +} + +int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev, + enum mlx5e_rx_res_features features, unsigned int max_nch, + u32 drop_rqn, const struct mlx5e_packet_merge_param *init_pkt_merge_param, + unsigned int init_nch) +{ + int err; + + res->mdev = mdev; + res->features = features; + res->max_nch = max_nch; + res->drop_rqn = drop_rqn; + + res->pkt_merge_param = *init_pkt_merge_param; + init_rwsem(&res->pkt_merge_param_sem); + + err = mlx5e_rx_res_rss_init_def(res, init_nch); + if (err) + goto err_out; + + err = mlx5e_rx_res_channels_init(res); + if (err) + goto err_rss_destroy; + + err = mlx5e_rx_res_ptp_init(res); + if (err) + goto err_channels_destroy; + + return 0; + +err_channels_destroy: + mlx5e_rx_res_channels_destroy(res); +err_rss_destroy: + __mlx5e_rx_res_rss_destroy(res, 0); +err_out: + return err; +} + +void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res) +{ + mlx5e_rx_res_ptp_destroy(res); + mlx5e_rx_res_channels_destroy(res); + mlx5e_rx_res_rss_destroy_all(res); +} + +void mlx5e_rx_res_free(struct mlx5e_rx_res *res) +{ + kvfree(res); +} + +u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix) +{ + return mlx5e_tir_get_tirn(&res->channels[ix].direct_tir); +} + +u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt) +{ + struct mlx5e_rss *rss = res->rss[0]; + + return mlx5e_rss_get_tirn(rss, tt, false); +} + +u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt) +{ + struct mlx5e_rss *rss = res->rss[0]; + + return mlx5e_rss_get_tirn(rss, tt, true); +} + +u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res) +{ + WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_PTP)); + return mlx5e_tir_get_tirn(&res->ptp.tir); +} + +static u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix) +{ + return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt); +} + +static void mlx5e_rx_res_channel_activate_direct(struct mlx5e_rx_res *res, + struct mlx5e_channels *chs, + unsigned int ix) +{ + u32 rqn = res->rss_rqns[ix]; + int err; + + err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), + rqn, ix, err); +} + +static void mlx5e_rx_res_channel_deactivate_direct(struct mlx5e_rx_res *res, + unsigned int ix) +{ + int err; + + err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), + res->drop_rqn, ix, err); +} + +void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs) +{ + unsigned int nch, ix; + int err; + + nch = mlx5e_channels_get_num(chs); + + for (ix = 0; ix < chs->num; ix++) { + if (mlx5e_channels_is_xsk(chs, ix)) + mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix]); + else + mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]); + } + res->rss_nch = chs->num; + + mlx5e_rx_res_rss_enable(res); + + for (ix = 0; ix < nch; ix++) + mlx5e_rx_res_channel_activate_direct(res, chs, ix); + for (ix = nch; ix < res->max_nch; ix++) + mlx5e_rx_res_channel_deactivate_direct(res, ix); + + if (res->features & MLX5E_RX_RES_FEATURE_PTP) { + u32 rqn; + + if (!mlx5e_channels_get_ptp_rqn(chs, &rqn)) + rqn = res->drop_rqn; + + err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (PTP): err = %d\n", + mlx5e_rqt_get_rqtn(&res->ptp.rqt), + rqn, err); + } +} + +void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res) +{ + unsigned int ix; + int err; + + mlx5e_rx_res_rss_disable(res); + + for (ix = 0; ix < res->max_nch; ix++) + mlx5e_rx_res_channel_deactivate_direct(res, ix); + + if (res->features & MLX5E_RX_RES_FEATURE_PTP) { + err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (PTP): err = %d\n", + mlx5e_rqt_get_rqtn(&res->ptp.rqt), + res->drop_rqn, err); + } +} + +void mlx5e_rx_res_xsk_update(struct mlx5e_rx_res *res, struct mlx5e_channels *chs, + unsigned int ix, bool xsk) +{ + if (xsk) + mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix]); + else + mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]); + + mlx5e_rx_res_rss_enable(res); + + mlx5e_rx_res_channel_activate_direct(res, chs, ix); +} + +int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res, + struct mlx5e_packet_merge_param *pkt_merge_param) +{ + struct mlx5e_tir_builder *builder; + int err, final_err; + unsigned int ix; + + builder = mlx5e_tir_builder_alloc(true); + if (!builder) + return -ENOMEM; + + down_write(&res->pkt_merge_param_sem); + res->pkt_merge_param = *pkt_merge_param; + + mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param); + + final_err = 0; + + for (ix = 0; ix < MLX5E_MAX_NUM_RSS; ix++) { + struct mlx5e_rss *rss = res->rss[ix]; + + if (!rss) + continue; + + err = mlx5e_rss_packet_merge_set_param(rss, pkt_merge_param); + if (err) + final_err = final_err ? : err; + } + + for (ix = 0; ix < res->max_nch; ix++) { + err = mlx5e_tir_modify(&res->channels[ix].direct_tir, builder); + if (err) { + mlx5_core_warn(res->mdev, "Failed to update packet merge state of direct TIR %#x for channel %u: err = %d\n", + mlx5e_tir_get_tirn(&res->channels[ix].direct_tir), ix, err); + if (!final_err) + final_err = err; + } + } + + up_write(&res->pkt_merge_param_sem); + mlx5e_tir_builder_free(builder); + return final_err; +} + +struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res) +{ + return mlx5e_rss_get_hash(res->rss[0]); +} + +int mlx5e_rx_res_tls_tir_create(struct mlx5e_rx_res *res, unsigned int rxq, + struct mlx5e_tir *tir) +{ + bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + struct mlx5e_tir_builder *builder; + u32 rqtn; + int err; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + rqtn = mlx5e_rx_res_get_rqtn_direct(res, rxq); + + mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, rqtn, + inner_ft_support); + mlx5e_tir_builder_build_direct(builder); + mlx5e_tir_builder_build_tls(builder); + down_read(&res->pkt_merge_param_sem); + mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param); + err = mlx5e_tir_init(tir, builder, res->mdev, false); + up_read(&res->pkt_merge_param_sem); + + mlx5e_tir_builder_free(builder); + + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h new file mode 100644 index 0000000000..580fe8bc3c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_EN_RX_RES_H__ +#define __MLX5_EN_RX_RES_H__ + +#include +#include "rqt.h" +#include "tir.h" +#include "fs.h" +#include "rss.h" + +struct mlx5e_rx_res; + +struct mlx5e_channels; +struct mlx5e_rss_params_hash; + +enum mlx5e_rx_res_features { + MLX5E_RX_RES_FEATURE_INNER_FT = BIT(0), + MLX5E_RX_RES_FEATURE_PTP = BIT(1), +}; + +/* Setup */ +struct mlx5e_rx_res *mlx5e_rx_res_alloc(void); +int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev, + enum mlx5e_rx_res_features features, unsigned int max_nch, + u32 drop_rqn, const struct mlx5e_packet_merge_param *init_pkt_merge_param, + unsigned int init_nch); +void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res); +void mlx5e_rx_res_free(struct mlx5e_rx_res *res); + +/* TIRN getters for flow steering */ +u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix); +u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); +u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); +u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res); + +/* Activate/deactivate API */ +void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs); +void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res); +void mlx5e_rx_res_xsk_update(struct mlx5e_rx_res *res, struct mlx5e_channels *chs, + unsigned int ix, bool xsk); + +/* Configuration API */ +void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch); +int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, + u32 *indir, u8 *key, u8 *hfunc); +int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, + const u32 *indir, const u8 *key, const u8 *hfunc); + +int mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, u32 rss_idx, + enum mlx5_traffic_types tt); +int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, u32 rss_idx, + enum mlx5_traffic_types tt, u8 rx_hash_fields); +int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res, + struct mlx5e_packet_merge_param *pkt_merge_param); + +int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch); +int mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx); +int mlx5e_rx_res_rss_cnt(struct mlx5e_rx_res *res); +int mlx5e_rx_res_rss_index(struct mlx5e_rx_res *res, struct mlx5e_rss *rss); +struct mlx5e_rss *mlx5e_rx_res_rss_get(struct mlx5e_rx_res *res, u32 rss_idx); + +/* Workaround for hairpin */ +struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res); + +/* Accel TIRs */ +int mlx5e_rx_res_tls_tir_create(struct mlx5e_rx_res *res, unsigned int rxq, + struct mlx5e_tir *tir); +#endif /* __MLX5_EN_RX_RES_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c new file mode 100644 index 0000000000..f675b19263 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c @@ -0,0 +1,266 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "selq.h" +#include +#include +#include +#include "en.h" +#include "en/ptp.h" +#include "en/htb.h" + +struct mlx5e_selq_params { + unsigned int num_regular_queues; + unsigned int num_channels; + unsigned int num_tcs; + union { + u8 is_special_queues; + struct { + bool is_htb : 1; + bool is_ptp : 1; + }; + }; + u16 htb_maj_id; + u16 htb_defcls; +}; + +int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock) +{ + struct mlx5e_selq_params *init_params; + + selq->state_lock = state_lock; + + selq->standby = kvzalloc(sizeof(*selq->standby), GFP_KERNEL); + if (!selq->standby) + return -ENOMEM; + + init_params = kvzalloc(sizeof(*selq->active), GFP_KERNEL); + if (!init_params) { + kvfree(selq->standby); + selq->standby = NULL; + return -ENOMEM; + } + /* Assign dummy values, so that mlx5e_select_queue won't crash. */ + *init_params = (struct mlx5e_selq_params) { + .num_regular_queues = 1, + .num_channels = 1, + .num_tcs = 1, + .is_htb = false, + .is_ptp = false, + .htb_maj_id = 0, + .htb_defcls = 0, + }; + rcu_assign_pointer(selq->active, init_params); + + return 0; +} + +void mlx5e_selq_cleanup(struct mlx5e_selq *selq) +{ + WARN_ON_ONCE(selq->is_prepared); + + kvfree(selq->standby); + selq->standby = NULL; + selq->is_prepared = true; + + mlx5e_selq_apply(selq); + + kvfree(selq->standby); + selq->standby = NULL; +} + +void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params) +{ + struct mlx5e_selq_params *selq_active; + + lockdep_assert_held(selq->state_lock); + WARN_ON_ONCE(selq->is_prepared); + + selq->is_prepared = true; + + selq_active = rcu_dereference_protected(selq->active, + lockdep_is_held(selq->state_lock)); + *selq->standby = *selq_active; + selq->standby->num_channels = params->num_channels; + selq->standby->num_tcs = mlx5e_get_dcb_num_tc(params); + selq->standby->num_regular_queues = + selq->standby->num_channels * selq->standby->num_tcs; + selq->standby->is_ptp = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_TX_PORT_TS); +} + +bool mlx5e_selq_is_htb_enabled(struct mlx5e_selq *selq) +{ + struct mlx5e_selq_params *selq_active = + rcu_dereference_protected(selq->active, lockdep_is_held(selq->state_lock)); + + return selq_active->htb_maj_id; +} + +void mlx5e_selq_prepare_htb(struct mlx5e_selq *selq, u16 htb_maj_id, u16 htb_defcls) +{ + struct mlx5e_selq_params *selq_active; + + lockdep_assert_held(selq->state_lock); + WARN_ON_ONCE(selq->is_prepared); + + selq->is_prepared = true; + + selq_active = rcu_dereference_protected(selq->active, + lockdep_is_held(selq->state_lock)); + *selq->standby = *selq_active; + selq->standby->is_htb = htb_maj_id; + selq->standby->htb_maj_id = htb_maj_id; + selq->standby->htb_defcls = htb_defcls; +} + +void mlx5e_selq_apply(struct mlx5e_selq *selq) +{ + struct mlx5e_selq_params *old_params; + + WARN_ON_ONCE(!selq->is_prepared); + + selq->is_prepared = false; + + old_params = rcu_replace_pointer(selq->active, selq->standby, + lockdep_is_held(selq->state_lock)); + synchronize_net(); /* Wait until ndo_select_queue starts emitting correct values. */ + selq->standby = old_params; +} + +void mlx5e_selq_cancel(struct mlx5e_selq *selq) +{ + lockdep_assert_held(selq->state_lock); + WARN_ON_ONCE(!selq->is_prepared); + + selq->is_prepared = false; +} + +#ifdef CONFIG_MLX5_CORE_EN_DCB +static int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb) +{ + int dscp_cp = 0; + + if (skb->protocol == htons(ETH_P_IP)) + dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; + else if (skb->protocol == htons(ETH_P_IPV6)) + dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; + + return priv->dcbx_dp.dscp2prio[dscp_cp]; +} +#endif + +static int mlx5e_get_up(struct mlx5e_priv *priv, struct sk_buff *skb) +{ +#ifdef CONFIG_MLX5_CORE_EN_DCB + if (READ_ONCE(priv->dcbx_dp.trust_state) == MLX5_QPTS_TRUST_DSCP) + return mlx5e_get_dscp_up(priv, skb); +#endif + if (skb_vlan_tag_present(skb)) + return skb_vlan_tag_get_prio(skb); + return 0; +} + +static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb, + struct mlx5e_selq_params *selq) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int up; + + up = selq->num_tcs > 1 ? mlx5e_get_up(priv, skb) : 0; + + return selq->num_regular_queues + up; +} + +static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5e_selq_params *selq) +{ + u16 classid; + + /* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */ + if ((TC_H_MAJ(skb->priority) >> 16) == selq->htb_maj_id) + classid = TC_H_MIN(skb->priority); + else + classid = selq->htb_defcls; + + if (!classid) + return 0; + + return mlx5e_htb_get_txq_by_classid(priv->htb, classid); +} + +u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_selq_params *selq; + int txq_ix, up; + + selq = rcu_dereference_bh(priv->selq.active); + + /* This is a workaround needed only for the mlx5e_netdev_change_profile + * flow that zeroes out the whole priv without unregistering the netdev + * and without preventing ndo_select_queue from being called. + */ + if (unlikely(!selq)) + return 0; + + if (likely(!selq->is_special_queues)) { + /* No special queues, netdev_pick_tx returns one of the regular ones. */ + + txq_ix = netdev_pick_tx(dev, skb, NULL); + + if (selq->num_tcs <= 1) + return txq_ix; + + up = mlx5e_get_up(priv, skb); + + /* Normalize any picked txq_ix to [0, num_channels), + * So we can return a txq_ix that matches the channel and + * packet UP. + */ + return mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels) + + up * selq->num_channels; + } + + if (unlikely(selq->htb_maj_id)) { + /* num_tcs == 1, shortcut for PTP */ + + txq_ix = mlx5e_select_htb_queue(priv, skb, selq); + if (txq_ix > 0) + return txq_ix; + + if (unlikely(selq->is_ptp && mlx5e_use_ptpsq(skb))) + return selq->num_channels; + + txq_ix = netdev_pick_tx(dev, skb, NULL); + + /* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs. + * If they are selected, switch to regular queues. + * Driver to select these queues only at mlx5e_select_ptpsq() + * and mlx5e_select_htb_queue(). + */ + return mlx5e_txq_to_ch_ix_htb(txq_ix, selq->num_channels); + } + + /* PTP is enabled */ + + if (mlx5e_use_ptpsq(skb)) + return mlx5e_select_ptpsq(dev, skb, selq); + + txq_ix = netdev_pick_tx(dev, skb, NULL); + + /* Normalize any picked txq_ix to [0, num_channels). Queues in range + * [0, num_regular_queues) will be mapped to the corresponding channel + * index, so that we can apply the packet's UP (if num_tcs > 1). + * If netdev_pick_tx() picks ptp_channel, switch to a regular queue, + * because driver should select the PTP only at mlx5e_select_ptpsq(). + */ + txq_ix = mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels); + + if (selq->num_tcs <= 1) + return txq_ix; + + up = mlx5e_get_up(priv, skb); + + return txq_ix + up * selq->num_channels; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h new file mode 100644 index 0000000000..fd590f80e4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_SELQ_H__ +#define __MLX5_EN_SELQ_H__ + +#include + +struct mlx5e_selq_params; + +struct mlx5e_selq { + struct mlx5e_selq_params __rcu *active; + struct mlx5e_selq_params *standby; + struct mutex *state_lock; /* points to priv->state_lock */ + bool is_prepared; +}; + +struct mlx5e_params; +struct net_device; +struct sk_buff; + +int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock); +void mlx5e_selq_cleanup(struct mlx5e_selq *selq); +void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params); +void mlx5e_selq_prepare_htb(struct mlx5e_selq *selq, u16 htb_maj_id, u16 htb_defcls); +bool mlx5e_selq_is_htb_enabled(struct mlx5e_selq *selq); +void mlx5e_selq_apply(struct mlx5e_selq *selq); +void mlx5e_selq_cancel(struct mlx5e_selq *selq); + +static inline u16 mlx5e_txq_to_ch_ix(u16 txq, u16 num_channels) +{ + while (unlikely(txq >= num_channels)) + txq -= num_channels; + return txq; +} + +static inline u16 mlx5e_txq_to_ch_ix_htb(u16 txq, u16 num_channels) +{ + if (unlikely(txq >= num_channels)) { + if (unlikely(txq >= num_channels << 3)) + txq %= num_channels; + else + do + txq -= num_channels; + while (txq >= num_channels); + } + return txq; +} + +u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev); + +#endif /* __MLX5_EN_SELQ_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c new file mode 100644 index 0000000000..9db1b5307a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" + +static int +tc_act_parse_accept(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->flags |= MLX5_ATTR_FLAG_ACCEPT; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_accept = { + .parse_action = tc_act_parse_accept, + .is_terminating_action = true, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c new file mode 100644 index 0000000000..0380a04c36 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc/post_act.h" +#include "en/tc_priv.h" +#include "mlx5_core.h" + +static struct mlx5e_tc_act *tc_acts_fdb[NUM_FLOW_ACTIONS] = { + [FLOW_ACTION_ACCEPT] = &mlx5e_tc_act_accept, + [FLOW_ACTION_DROP] = &mlx5e_tc_act_drop, + [FLOW_ACTION_TRAP] = &mlx5e_tc_act_trap, + [FLOW_ACTION_GOTO] = &mlx5e_tc_act_goto, + [FLOW_ACTION_REDIRECT] = &mlx5e_tc_act_redirect, + [FLOW_ACTION_MIRRED] = &mlx5e_tc_act_mirred, + [FLOW_ACTION_REDIRECT_INGRESS] = &mlx5e_tc_act_redirect_ingress, + [FLOW_ACTION_VLAN_PUSH] = &mlx5e_tc_act_vlan, + [FLOW_ACTION_VLAN_POP] = &mlx5e_tc_act_vlan, + [FLOW_ACTION_VLAN_MANGLE] = &mlx5e_tc_act_vlan_mangle, + [FLOW_ACTION_TUNNEL_ENCAP] = &mlx5e_tc_act_tun_encap, + [FLOW_ACTION_TUNNEL_DECAP] = &mlx5e_tc_act_tun_decap, + [FLOW_ACTION_MANGLE] = &mlx5e_tc_act_pedit, + [FLOW_ACTION_ADD] = &mlx5e_tc_act_pedit, + [FLOW_ACTION_CSUM] = &mlx5e_tc_act_csum, + [FLOW_ACTION_PTYPE] = &mlx5e_tc_act_ptype, + [FLOW_ACTION_SAMPLE] = &mlx5e_tc_act_sample, + [FLOW_ACTION_POLICE] = &mlx5e_tc_act_police, + [FLOW_ACTION_CT] = &mlx5e_tc_act_ct, + [FLOW_ACTION_MPLS_PUSH] = &mlx5e_tc_act_mpls_push, + [FLOW_ACTION_MPLS_POP] = &mlx5e_tc_act_mpls_pop, + [FLOW_ACTION_VLAN_PUSH_ETH] = &mlx5e_tc_act_vlan, + [FLOW_ACTION_VLAN_POP_ETH] = &mlx5e_tc_act_vlan, +}; + +static struct mlx5e_tc_act *tc_acts_nic[NUM_FLOW_ACTIONS] = { + [FLOW_ACTION_ACCEPT] = &mlx5e_tc_act_accept, + [FLOW_ACTION_DROP] = &mlx5e_tc_act_drop, + [FLOW_ACTION_GOTO] = &mlx5e_tc_act_goto, + [FLOW_ACTION_REDIRECT] = &mlx5e_tc_act_mirred_nic, + [FLOW_ACTION_MANGLE] = &mlx5e_tc_act_pedit, + [FLOW_ACTION_ADD] = &mlx5e_tc_act_pedit, + [FLOW_ACTION_CSUM] = &mlx5e_tc_act_csum, + [FLOW_ACTION_MARK] = &mlx5e_tc_act_mark, + [FLOW_ACTION_CT] = &mlx5e_tc_act_ct, +}; + +/** + * mlx5e_tc_act_get() - Get an action parser for an action id. + * @act_id: Flow action id. + * @ns_type: flow namespace type. + */ +struct mlx5e_tc_act * +mlx5e_tc_act_get(enum flow_action_id act_id, + enum mlx5_flow_namespace_type ns_type) +{ + struct mlx5e_tc_act **tc_acts; + + tc_acts = ns_type == MLX5_FLOW_NAMESPACE_FDB ? tc_acts_fdb : tc_acts_nic; + + return tc_acts[act_id]; +} + +/** + * mlx5e_tc_act_init_parse_state() - Init a new parse_state. + * @parse_state: Parsing state. + * @flow: mlx5e tc flow being handled. + * @flow_action: flow action to parse. + * @extack: to set an error msg. + * + * The same parse_state should be passed to action parsers + * for tracking the current parsing state. + */ +void +mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_tc_flow *flow, + struct flow_action *flow_action, + struct netlink_ext_ack *extack) +{ + memset(parse_state, 0, sizeof(*parse_state)); + parse_state->flow = flow; + parse_state->extack = extack; + parse_state->flow_action = flow_action; +} + +int +mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state, + struct flow_action *flow_action, int from, int to, + struct mlx5_flow_attr *attr, + enum mlx5_flow_namespace_type ns_type) +{ + struct flow_action_entry *act; + struct mlx5e_tc_act *tc_act; + struct mlx5e_priv *priv; + int err = 0, i; + + priv = parse_state->flow->priv; + + flow_action_for_each(i, act, flow_action) { + if (i < from) + continue; + else if (i > to) + break; + + tc_act = mlx5e_tc_act_get(act->id, ns_type); + if (!tc_act || !tc_act->post_parse) + continue; + + err = tc_act->post_parse(parse_state, priv, attr); + if (err) + goto out; + } + +out: + return err; +} + +int +mlx5e_tc_act_set_next_post_act(struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct mlx5_flow_attr *next_attr) +{ + struct mlx5_core_dev *mdev = flow->priv->mdev; + struct mlx5e_tc_mod_hdr_acts *mod_acts; + int err; + + mod_acts = &attr->parse_attr->mod_hdr_acts; + + /* Set handle on current post act rule to next post act rule. */ + err = mlx5e_tc_post_act_set_handle(mdev, next_attr->post_act_handle, mod_acts); + if (err) { + mlx5_core_warn(mdev, "Failed setting post action handle"); + return err; + } + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h new file mode 100644 index 0000000000..d6c12d0ea5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_TC_ACT_H__ +#define __MLX5_EN_TC_ACT_H__ + +#include +#include +#include +#include "eswitch.h" +#include "pedit.h" + +struct mlx5_flow_attr; + +struct mlx5e_tc_act_parse_state { + struct flow_action *flow_action; + struct mlx5e_tc_flow *flow; + struct netlink_ext_ack *extack; + u32 actions; + bool encap; + bool decap; + bool mpls_push; + bool eth_push; + bool eth_pop; + bool ptype_host; + const struct ip_tunnel_info *tun_info; + struct mlx5e_mpls_info mpls_info; + int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS]; + int if_count; + struct mlx5_tc_ct_priv *ct_priv; +}; + +struct mlx5e_tc_act_branch_ctrl { + enum flow_action_id act_id; + u32 extval; +}; + +struct mlx5e_tc_act { + bool (*can_offload)(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr); + + int (*parse_action)(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr); + + int (*post_parse)(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr); + + bool (*is_multi_table_act)(struct mlx5e_priv *priv, + const struct flow_action_entry *act, + struct mlx5_flow_attr *attr); + + bool (*is_missable)(const struct flow_action_entry *act); + + int (*offload_action)(struct mlx5e_priv *priv, + struct flow_offload_action *fl_act, + struct flow_action_entry *act); + + int (*destroy_action)(struct mlx5e_priv *priv, + struct flow_offload_action *fl_act); + + int (*stats_action)(struct mlx5e_priv *priv, + struct flow_offload_action *fl_act); + + bool (*get_branch_ctrl)(const struct flow_action_entry *act, + struct mlx5e_tc_act_branch_ctrl *cond_true, + struct mlx5e_tc_act_branch_ctrl *cond_false); + + bool is_terminating_action; +}; + +struct mlx5e_tc_flow_action { + unsigned int num_entries; + struct flow_action_entry **entries; +}; + +extern struct mlx5e_tc_act mlx5e_tc_act_drop; +extern struct mlx5e_tc_act mlx5e_tc_act_trap; +extern struct mlx5e_tc_act mlx5e_tc_act_accept; +extern struct mlx5e_tc_act mlx5e_tc_act_mark; +extern struct mlx5e_tc_act mlx5e_tc_act_goto; +extern struct mlx5e_tc_act mlx5e_tc_act_tun_encap; +extern struct mlx5e_tc_act mlx5e_tc_act_tun_decap; +extern struct mlx5e_tc_act mlx5e_tc_act_csum; +extern struct mlx5e_tc_act mlx5e_tc_act_pedit; +extern struct mlx5e_tc_act mlx5e_tc_act_vlan; +extern struct mlx5e_tc_act mlx5e_tc_act_vlan_mangle; +extern struct mlx5e_tc_act mlx5e_tc_act_mpls_push; +extern struct mlx5e_tc_act mlx5e_tc_act_mpls_pop; +extern struct mlx5e_tc_act mlx5e_tc_act_mirred; +extern struct mlx5e_tc_act mlx5e_tc_act_redirect; +extern struct mlx5e_tc_act mlx5e_tc_act_mirred_nic; +extern struct mlx5e_tc_act mlx5e_tc_act_ct; +extern struct mlx5e_tc_act mlx5e_tc_act_sample; +extern struct mlx5e_tc_act mlx5e_tc_act_ptype; +extern struct mlx5e_tc_act mlx5e_tc_act_redirect_ingress; +extern struct mlx5e_tc_act mlx5e_tc_act_police; + +struct mlx5e_tc_act * +mlx5e_tc_act_get(enum flow_action_id act_id, + enum mlx5_flow_namespace_type ns_type); + +void +mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_tc_flow *flow, + struct flow_action *flow_action, + struct netlink_ext_ack *extack); + +int +mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state, + struct flow_action *flow_action, int from, int to, + struct mlx5_flow_attr *attr, + enum mlx5_flow_namespace_type ns_type); + +int +mlx5e_tc_act_set_next_post_act(struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct mlx5_flow_attr *next_attr); + +#endif /* __MLX5_EN_TC_ACT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c new file mode 100644 index 0000000000..c0f08ae6a5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include +#include "act.h" +#include "en/tc_priv.h" + +static bool +csum_offload_supported(struct mlx5e_priv *priv, + u32 action, + u32 update_flags, + struct netlink_ext_ack *extack) +{ + u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP | + TCA_CSUM_UPDATE_FLAG_UDP; + + /* The HW recalcs checksums only if re-writing headers */ + if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) { + NL_SET_ERR_MSG_MOD(extack, + "TC csum action is only offloaded with pedit"); + netdev_warn(priv->netdev, + "TC csum action is only offloaded with pedit\n"); + return false; + } + + if (update_flags & ~prot_flags) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload TC csum action for some header/s"); + netdev_warn(priv->netdev, + "can't offload TC csum action for some header/s - flags %#x\n", + update_flags); + return false; + } + + return true; +} + +static bool +tc_act_can_offload_csum(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_flow *flow = parse_state->flow; + + return csum_offload_supported(flow->priv, attr->action, + act->csum_flags, parse_state->extack); +} + +static int +tc_act_parse_csum(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_csum = { + .can_offload = tc_act_can_offload_csum, + .parse_action = tc_act_parse_csum, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c new file mode 100644 index 0000000000..feeb41693c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" +#include "en/tc_ct.h" + +static int +tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + int err; + + err = mlx5_tc_ct_parse_action(parse_state->ct_priv, attr, act, parse_state->extack); + if (err) + return err; + + if (mlx5e_is_eswitch_flow(parse_state->flow)) { + attr->esw_attr->split_count = attr->esw_attr->out_count; + parse_state->if_count = 0; + } + + attr->flags |= MLX5_ATTR_FLAG_CT; + + return 0; +} + +static int +tc_act_post_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + if (!(attr->flags & MLX5_ATTR_FLAG_CT)) + return 0; + + return mlx5_tc_ct_flow_offload(parse_state->ct_priv, attr); +} + +static bool +tc_act_is_multi_table_act_ct(struct mlx5e_priv *priv, + const struct flow_action_entry *act, + struct mlx5_flow_attr *attr) +{ + if (act->ct.action & TCA_CT_ACT_CLEAR) + return false; + + return true; +} + +static bool +tc_act_is_missable_ct(const struct flow_action_entry *act) +{ + return !(act->ct.action & TCA_CT_ACT_CLEAR); +} + +struct mlx5e_tc_act mlx5e_tc_act_ct = { + .parse_action = tc_act_parse_ct, + .post_parse = tc_act_post_parse_ct, + .is_multi_table_act = tc_act_is_multi_table_act_ct, + .is_missable = tc_act_is_missable_ct, +}; + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c new file mode 100644 index 0000000000..5dc81715d6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" + +static int +tc_act_parse_drop(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_drop = { + .parse_action = tc_act_parse_drop, + .is_terminating_action = true, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c new file mode 100644 index 0000000000..0923e6db2d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" +#include "eswitch.h" + +static int +validate_goto_chain(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + bool is_esw = mlx5e_is_eswitch_flow(flow); + bool ft_flow = mlx5e_is_ft_flow(flow); + u32 dest_chain = act->chain_index; + struct mlx5_fs_chains *chains; + struct mlx5_eswitch *esw; + u32 reformat_and_fwd; + u32 max_chain; + + esw = priv->mdev->priv.eswitch; + chains = is_esw ? esw_chains(esw) : mlx5e_nic_chains(tc); + max_chain = mlx5_chains_get_chain_range(chains); + reformat_and_fwd = is_esw ? + MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_and_fwd_to_table) : + MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, reformat_and_fwd_to_table); + + if (ft_flow) { + NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported"); + return -EOPNOTSUPP; + } + + if (!mlx5_chains_backwards_supported(chains) && + dest_chain <= attr->chain) { + NL_SET_ERR_MSG_MOD(extack, "Goto lower numbered chain isn't supported"); + return -EOPNOTSUPP; + } + + if (dest_chain > max_chain) { + NL_SET_ERR_MSG_MOD(extack, + "Requested destination chain is out of supported range"); + return -EOPNOTSUPP; + } + + if (attr->action & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | + MLX5_FLOW_CONTEXT_ACTION_DECAP) && + !reformat_and_fwd) { + NL_SET_ERR_MSG_MOD(extack, + "Goto chain is not allowed if action has reformat or decap"); + return -EOPNOTSUPP; + } + + return 0; +} + +static bool +tc_act_can_offload_goto(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow *flow = parse_state->flow; + + if (validate_goto_chain(flow->priv, flow, attr, act, extack)) + return false; + + return true; +} + +static int +tc_act_parse_goto(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->dest_chain = act->chain_index; + + return 0; +} + +static int +tc_act_post_parse_goto(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow *flow = parse_state->flow; + + if (!attr->dest_chain) + return 0; + + if (parse_state->decap) { + /* It can be supported if we'll create a mapping for + * the tunnel device only (without tunnel), and set + * this tunnel id with this decap flow. + * + * On restore (miss), we'll just set this saved tunnel + * device. + */ + + NL_SET_ERR_MSG_MOD(extack, "Decap with goto isn't supported"); + netdev_warn(priv->netdev, "Decap with goto isn't supported"); + return -EOPNOTSUPP; + } + + if (!mlx5e_is_eswitch_flow(flow) && parse_attr->mirred_ifindex[0]) { + NL_SET_ERR_MSG_MOD(extack, "Mirroring goto chain rules isn't supported"); + return -EOPNOTSUPP; + } + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_goto = { + .can_offload = tc_act_can_offload_goto, + .parse_action = tc_act_parse_goto, + .post_parse = tc_act_post_parse_goto, + .is_terminating_action = true, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c new file mode 100644 index 0000000000..e8d227595b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en_tc.h" + +static bool +tc_act_can_offload_mark(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + if (act->mark & ~MLX5E_TC_FLOW_ID_MASK) { + NL_SET_ERR_MSG_MOD(parse_state->extack, "Bad flow mark, only 16 bit supported"); + return false; + } + + return true; +} + +static int +tc_act_parse_mark(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->nic_attr->flow_tag = act->mark; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_mark = { + .can_offload = tc_act_can_offload_mark, + .parse_action = tc_act_parse_mark, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c new file mode 100644 index 0000000000..1b418095b7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c @@ -0,0 +1,337 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include +#include +#include +#include +#include "act.h" +#include "vlan.h" +#include "en/tc_tun_encap.h" +#include "en/tc_priv.h" +#include "en_rep.h" +#include "lag/lag.h" + +static bool +same_vf_reps(struct mlx5e_priv *priv, struct net_device *out_dev) +{ + return mlx5e_eswitch_vf_rep(priv->netdev) && + priv->netdev == out_dev; +} + +static int +verify_uplink_forwarding(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + struct net_device *out_dev, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rep_priv; + + /* Forwarding non encapsulated traffic between + * uplink ports is allowed only if + * termination_table_raw_traffic cap is set. + * + * Input vport was stored attr->in_rep. + * In LAG case, *priv* is the private data of + * uplink which may be not the input vport. + */ + rep_priv = mlx5e_rep_to_rep_priv(attr->esw_attr->in_rep); + + if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) && + mlx5e_eswitch_uplink_rep(out_dev))) + return 0; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, + termination_table_raw_traffic)) { + NL_SET_ERR_MSG_MOD(extack, + "devices are both uplink, can't offload forwarding"); + return -EOPNOTSUPP; + } else if (out_dev != rep_priv->netdev) { + NL_SET_ERR_MSG_MOD(extack, + "devices are not the same uplink, can't offload forwarding"); + return -EOPNOTSUPP; + } + return 0; +} + +static bool +is_duplicated_output_device(struct net_device *dev, + struct net_device *out_dev, + int *ifindexes, int if_count, + struct netlink_ext_ack *extack) +{ + int i; + + for (i = 0; i < if_count; i++) { + if (ifindexes[i] == out_dev->ifindex) { + NL_SET_ERR_MSG_MOD(extack, "can't duplicate output to same device"); + netdev_err(dev, "can't duplicate output to same device: %s\n", + out_dev->name); + return true; + } + } + + return false; +} + +static struct net_device * +get_fdb_out_dev(struct net_device *uplink_dev, struct net_device *out_dev) +{ + struct net_device *fdb_out_dev = out_dev; + struct net_device *uplink_upper; + + rcu_read_lock(); + uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev); + if (uplink_upper && netif_is_lag_master(uplink_upper) && + uplink_upper == out_dev) { + fdb_out_dev = uplink_dev; + } else if (netif_is_lag_master(out_dev)) { + fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev)); + if (fdb_out_dev && + (!mlx5e_eswitch_rep(fdb_out_dev) || + !netdev_port_same_parent_id(fdb_out_dev, uplink_dev))) + fdb_out_dev = NULL; + } + rcu_read_unlock(); + return fdb_out_dev; +} + +static bool +tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow *flow = parse_state->flow; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct net_device *out_dev = act->dev; + struct mlx5e_priv *priv = flow->priv; + struct mlx5_esw_flow_attr *esw_attr; + + parse_attr = attr->parse_attr; + esw_attr = attr->esw_attr; + + if (!out_dev) { + /* out_dev is NULL when filters with + * non-existing mirred device are replayed to + * the driver. + */ + return false; + } + + if (parse_state->mpls_push && !netif_is_bareudp(out_dev)) { + NL_SET_ERR_MSG_MOD(extack, "mpls is supported only through a bareudp device"); + return false; + } + + if (parse_state->eth_pop && !parse_state->mpls_push) { + NL_SET_ERR_MSG_MOD(extack, "vlan pop eth is supported only with mpls push"); + return false; + } + + if (flow_flag_test(parse_state->flow, L3_TO_L2_DECAP) && !parse_state->eth_push) { + NL_SET_ERR_MSG_MOD(extack, "mpls pop is only supported with vlan eth push"); + return false; + } + + if (mlx5e_is_ft_flow(flow) && out_dev == priv->netdev) { + /* Ignore forward to self rules generated + * by adding both mlx5 devs to the flow table + * block on a normal nft offload setup. + */ + return false; + } + + if (esw_attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) { + NL_SET_ERR_MSG_MOD(extack, + "can't support more output ports, can't offload forwarding"); + netdev_warn(priv->netdev, + "can't support more than %d output ports, can't offload forwarding\n", + esw_attr->out_count); + return false; + } + + if (parse_state->encap || + netdev_port_same_parent_id(priv->netdev, out_dev) || + netif_is_ovs_master(out_dev)) + return true; + + if (parse_attr->filter_dev != priv->netdev) { + /* All mlx5 devices are called to configure + * high level device filters. Therefore, the + * *attempt* to install a filter on invalid + * eswitch should not trigger an explicit error + */ + return false; + } + + NL_SET_ERR_MSG_MOD(extack, "devices are not on same switch HW, can't offload forwarding"); + + return false; +} + +static int +parse_mirred_encap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct net_device *out_dev = act->dev; + + parse_attr->mirred_ifindex[esw_attr->out_count] = out_dev->ifindex; + parse_attr->tun_info[esw_attr->out_count] = + mlx5e_dup_tun_info(parse_state->tun_info); + + if (!parse_attr->tun_info[esw_attr->out_count]) + return -ENOMEM; + + parse_state->encap = false; + + if (parse_state->mpls_push) { + memcpy(&parse_attr->mpls_info[esw_attr->out_count], + &parse_state->mpls_info, sizeof(parse_state->mpls_info)); + parse_state->mpls_push = false; + } + esw_attr->dests[esw_attr->out_count].flags |= MLX5_ESW_DEST_ENCAP; + esw_attr->out_count++; + /* attr->dests[].vport is resolved when we handle encap */ + + return 0; +} + +static int +parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct net_device *out_dev = act->dev; + struct net_device *uplink_dev; + struct mlx5e_priv *out_priv; + struct mlx5_eswitch *esw; + int *ifindexes; + int if_count; + int err; + + esw = priv->mdev->priv.eswitch; + uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + ifindexes = parse_state->ifindexes; + if_count = parse_state->if_count; + + if (is_duplicated_output_device(priv->netdev, out_dev, ifindexes, if_count, extack)) + return -EOPNOTSUPP; + + parse_state->ifindexes[if_count] = out_dev->ifindex; + parse_state->if_count++; + + if (mlx5_lag_mpesw_do_mirred(priv->mdev, out_dev, extack)) + return -EOPNOTSUPP; + + if (netif_is_macvlan(out_dev)) + out_dev = macvlan_dev_real_dev(out_dev); + + out_dev = get_fdb_out_dev(uplink_dev, out_dev); + if (!out_dev) + return -ENODEV; + + if (is_vlan_dev(out_dev)) { + err = mlx5e_tc_act_vlan_add_push_action(priv, attr, &out_dev, extack); + if (err) + return err; + } + + if (is_vlan_dev(parse_attr->filter_dev)) { + err = mlx5e_tc_act_vlan_add_pop_action(priv, attr, extack); + if (err) + return err; + } + + err = verify_uplink_forwarding(priv, attr, out_dev, extack); + if (err) + return err; + + if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "devices are not on same switch HW, can't offload forwarding"); + return -EOPNOTSUPP; + } + + if (same_vf_reps(priv, out_dev)) { + NL_SET_ERR_MSG_MOD(extack, "can't forward from a VF to itself"); + return -EOPNOTSUPP; + } + + out_priv = netdev_priv(out_dev); + rpriv = out_priv->ppriv; + esw_attr->dests[esw_attr->out_count].vport_valid = true; + esw_attr->dests[esw_attr->out_count].vport = rpriv->rep->vport; + esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev; + + esw_attr->out_count++; + + return 0; +} + +static int +parse_mirred_ovs_master(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct net_device *out_dev = act->dev; + int err; + + err = mlx5e_set_fwd_to_int_port_actions(priv, attr, out_dev->ifindex, + MLX5E_TC_INT_PORT_EGRESS, + &attr->action, esw_attr->out_count); + if (err) + return err; + + parse_state->if_count = 0; + esw_attr->out_count++; + return 0; +} + +static int +tc_act_parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct net_device *out_dev = act->dev; + int err = -EOPNOTSUPP; + + if (parse_state->encap) + err = parse_mirred_encap(parse_state, act, attr); + else if (netdev_port_same_parent_id(priv->netdev, out_dev)) + err = parse_mirred(parse_state, act, priv, attr); + else if (netif_is_ovs_master(out_dev)) + err = parse_mirred_ovs_master(parse_state, act, priv, attr); + + if (err) + return err; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_mirred = { + .can_offload = tc_act_can_offload_mirred, + .parse_action = tc_act_parse_mirred, + .is_terminating_action = false, +}; + +struct mlx5e_tc_act mlx5e_tc_act_redirect = { + .can_offload = tc_act_can_offload_mirred, + .parse_action = tc_act_parse_mirred, + .is_terminating_action = true, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c new file mode 100644 index 0000000000..7f409692b1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow *flow = parse_state->flow; + struct net_device *out_dev = act->dev; + struct mlx5e_priv *priv = flow->priv; + + if (act->id != FLOW_ACTION_REDIRECT) + return false; + + if (priv->netdev->netdev_ops != out_dev->netdev_ops || + !mlx5e_same_hw_devs(priv, netdev_priv(out_dev))) { + NL_SET_ERR_MSG_MOD(extack, + "devices are not on same switch HW, can't offload forwarding"); + netdev_warn(priv->netdev, + "devices %s %s not on same switch HW, can't offload forwarding\n", + netdev_name(priv->netdev), + out_dev->name); + return false; + } + + return true; +} + +static int +tc_act_parse_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->parse_attr->mirred_ifindex[0] = act->dev->ifindex; + flow_flag_set(parse_state->flow, HAIRPIN); + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_mirred_nic = { + .can_offload = tc_act_can_offload_mirred_nic, + .parse_action = tc_act_parse_mirred_nic, + .is_terminating_action = true, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c new file mode 100644 index 0000000000..f106190bf3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_mpls_push(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_priv *priv = parse_state->flow->priv; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_l2_to_l3_tunnel) || + act->mpls_push.proto != htons(ETH_P_MPLS_UC)) { + NL_SET_ERR_MSG_MOD(extack, "mpls push is supported only for mpls_uc protocol"); + return false; + } + + return true; +} + +static void +copy_mpls_info(struct mlx5e_mpls_info *mpls_info, + const struct flow_action_entry *act) +{ + mpls_info->label = act->mpls_push.label; + mpls_info->tc = act->mpls_push.tc; + mpls_info->bos = act->mpls_push.bos; + mpls_info->ttl = act->mpls_push.ttl; +} + +static int +tc_act_parse_mpls_push(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + parse_state->mpls_push = true; + copy_mpls_info(&parse_state->mpls_info, act); + + return 0; +} + +static bool +tc_act_can_offload_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct net_device *filter_dev; + + filter_dev = attr->parse_attr->filter_dev; + + /* we only support mpls pop if it is the first action + * or it is second action after tunnel key unset + * and the filter net device is bareudp. Subsequent + * actions can be pedit and the last can be mirred + * egress redirect. + */ + if ((act_index == 1 && !parse_state->decap) || act_index > 1) { + NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only as first action or with decap"); + return false; + } + + if (!netif_is_bareudp(filter_dev)) { + NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only on bareudp devices"); + return false; + } + + return true; +} + +static int +tc_act_parse_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->esw_attr->eth.h_proto = act->mpls_pop.proto; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_flag_set(parse_state->flow, L3_TO_L2_DECAP); + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_mpls_push = { + .can_offload = tc_act_can_offload_mpls_push, + .parse_action = tc_act_parse_mpls_push, +}; + +struct mlx5e_tc_act mlx5e_tc_act_mpls_pop = { + .can_offload = tc_act_can_offload_mpls_pop, + .parse_action = tc_act_parse_mpls_pop, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c new file mode 100644 index 0000000000..368a95fa77 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include +#include "act.h" +#include "pedit.h" +#include "en/tc_priv.h" +#include "en/mod_hdr.h" + +static int pedit_header_offsets[] = { + [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth), + [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4), + [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6), + [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp), + [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp), +}; + +#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype]) + +static int +set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset, + struct pedit_headers_action *hdrs, + struct netlink_ext_ack *extack) +{ + u32 *curr_pmask, *curr_pval; + + curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset); + curr_pval = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset); + + if (*curr_pmask & mask) { /* disallow acting twice on the same location */ + NL_SET_ERR_MSG_MOD(extack, + "curr_pmask and new mask same. Acting twice on same location"); + goto out_err; + } + + *curr_pmask |= mask; + *curr_pval |= (val & mask); + + return 0; + +out_err: + return -EOPNOTSUPP; +} + +int +mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv, + const struct flow_action_entry *act, int namespace, + struct pedit_headers_action *hdrs, + struct netlink_ext_ack *extack) +{ + u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1; + u8 htype = act->mangle.htype; + int err = -EOPNOTSUPP; + u32 mask, val, offset; + + if (htype == FLOW_ACT_MANGLE_UNSPEC) { + NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded"); + goto out_err; + } + + if (!mlx5e_mod_hdr_max_actions(priv->mdev, namespace)) { + NL_SET_ERR_MSG_MOD(extack, "The pedit offload action is not supported"); + goto out_err; + } + + mask = act->mangle.mask; + val = act->mangle.val; + offset = act->mangle.offset; + + err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd], extack); + if (err) + goto out_err; + + hdrs[cmd].pedits++; + + return 0; +out_err: + return err; +} + +static int +tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5e_tc_flow *flow = parse_state->flow; + enum mlx5_flow_namespace_type ns_type; + int err; + + ns_type = mlx5e_get_flow_namespace(flow); + + err = mlx5e_tc_act_pedit_parse_action(flow->priv, act, ns_type, attr->parse_attr->hdrs, + parse_state->extack); + if (err) + return err; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + if (ns_type == MLX5_FLOW_NAMESPACE_FDB) { + esw_attr->split_count = esw_attr->out_count; + parse_state->if_count = 0; + } + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_pedit = { + .parse_action = tc_act_parse_pedit, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h new file mode 100644 index 0000000000..434c8bd710 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_TC_ACT_PEDIT_H__ +#define __MLX5_EN_TC_ACT_PEDIT_H__ + +#include "en_tc.h" + +struct pedit_headers { + struct ethhdr eth; + struct vlan_hdr vlan; + struct iphdr ip4; + struct ipv6hdr ip6; + struct tcphdr tcp; + struct udphdr udp; +}; + +struct pedit_headers_action { + struct pedit_headers vals; + struct pedit_headers masks; + u32 pedits; +}; + +int +mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv, + const struct flow_action_entry *act, int namespace, + struct pedit_headers_action *hdrs, + struct netlink_ext_ack *extack); + +#endif /* __MLX5_EN_TC_ACT_PEDIT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c new file mode 100644 index 0000000000..1bd1c94fb9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" +#include "fs_core.h" + +static bool police_act_validate_control(enum flow_action_id act_id, + struct netlink_ext_ack *extack) +{ + if (act_id != FLOW_ACTION_PIPE && + act_id != FLOW_ACTION_ACCEPT && + act_id != FLOW_ACTION_JUMP && + act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform-exceed action is not pipe, ok, jump or drop"); + return false; + } + + return true; +} + +static int police_act_validate(const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + if (!police_act_validate_control(act->police.exceed.act_id, extack) || + !police_act_validate_control(act->police.notexceed.act_id, extack)) + return -EOPNOTSUPP; + + if (act->police.peakrate_bytes_ps || + act->police.avrate || act->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + return 0; +} + +static bool +tc_act_can_offload_police(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + int err; + + err = police_act_validate(act, parse_state->extack); + if (err) + return false; + + return !!mlx5e_get_flow_meters(parse_state->flow->priv->mdev); +} + +static int +fill_meter_params_from_act(const struct flow_action_entry *act, + struct mlx5e_flow_meter_params *params) +{ + params->index = act->hw_index; + if (act->police.rate_bytes_ps) { + params->mode = MLX5_RATE_LIMIT_BPS; + /* change rate to bits per second */ + params->rate = act->police.rate_bytes_ps << 3; + params->burst = act->police.burst; + } else if (act->police.rate_pkt_ps) { + params->mode = MLX5_RATE_LIMIT_PPS; + params->rate = act->police.rate_pkt_ps; + params->burst = act->police.burst_pkt; + } else if (act->police.mtu) { + params->mtu = act->police.mtu; + } else { + return -EOPNOTSUPP; + } + + return 0; +} + +static int +tc_act_parse_police(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + enum mlx5_flow_namespace_type ns = mlx5e_get_flow_namespace(parse_state->flow); + struct mlx5e_flow_meter_params *params = &attr->meter_attr.params; + int err; + + err = fill_meter_params_from_act(act, params); + if (err) + return err; + + if (params->mtu) { + if (!(mlx5_fs_get_capabilities(priv->mdev, ns) & + MLX5_FLOW_STEERING_CAP_MATCH_RANGES)) + return -EOPNOTSUPP; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->flags |= MLX5_ATTR_FLAG_MTU; + } else { + attr->action |= MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO; + attr->exe_aso_type = MLX5_EXE_ASO_FLOW_METER; + } + + return 0; +} + +static bool +tc_act_is_multi_table_act_police(struct mlx5e_priv *priv, + const struct flow_action_entry *act, + struct mlx5_flow_attr *attr) +{ + return true; +} + +static int +tc_act_police_offload(struct mlx5e_priv *priv, + struct flow_offload_action *fl_act, + struct flow_action_entry *act) +{ + struct mlx5e_flow_meter_params params = {}; + struct mlx5e_flow_meter_handle *meter; + int err = 0; + + err = police_act_validate(act, fl_act->extack); + if (err) + return err; + + err = fill_meter_params_from_act(act, ¶ms); + if (err) + return err; + + meter = mlx5e_tc_meter_get(priv->mdev, ¶ms); + if (IS_ERR(meter) && PTR_ERR(meter) == -ENOENT) { + meter = mlx5e_tc_meter_replace(priv->mdev, ¶ms); + } else if (!IS_ERR(meter)) { + err = mlx5e_tc_meter_update(meter, ¶ms); + mlx5e_tc_meter_put(meter); + } + + if (IS_ERR(meter)) { + NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter"); + mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index); + err = PTR_ERR(meter); + } + + return err; +} + +static int +tc_act_police_destroy(struct mlx5e_priv *priv, + struct flow_offload_action *fl_act) +{ + struct mlx5e_flow_meter_params params = {}; + struct mlx5e_flow_meter_handle *meter; + + params.index = fl_act->index; + meter = mlx5e_tc_meter_get(priv->mdev, ¶ms); + if (IS_ERR(meter)) { + NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter"); + mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index); + return PTR_ERR(meter); + } + /* first put for the get and second for cleanup */ + mlx5e_tc_meter_put(meter); + mlx5e_tc_meter_put(meter); + return 0; +} + +static int +tc_act_police_stats(struct mlx5e_priv *priv, + struct flow_offload_action *fl_act) +{ + struct mlx5e_flow_meter_params params = {}; + struct mlx5e_flow_meter_handle *meter; + u64 bytes, packets, drops, lastuse; + + params.index = fl_act->index; + meter = mlx5e_tc_meter_get(priv->mdev, ¶ms); + if (IS_ERR(meter)) { + NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter"); + return PTR_ERR(meter); + } + + mlx5e_tc_meter_get_stats(meter, &bytes, &packets, &drops, &lastuse); + flow_stats_update(&fl_act->stats, bytes, packets, drops, lastuse, + FLOW_ACTION_HW_STATS_DELAYED); + mlx5e_tc_meter_put(meter); + return 0; +} + +static bool +tc_act_police_get_branch_ctrl(const struct flow_action_entry *act, + struct mlx5e_tc_act_branch_ctrl *cond_true, + struct mlx5e_tc_act_branch_ctrl *cond_false) +{ + cond_true->act_id = act->police.notexceed.act_id; + cond_true->extval = act->police.notexceed.extval; + + cond_false->act_id = act->police.exceed.act_id; + cond_false->extval = act->police.exceed.extval; + return true; +} + +struct mlx5e_tc_act mlx5e_tc_act_police = { + .can_offload = tc_act_can_offload_police, + .parse_action = tc_act_parse_police, + .is_multi_table_act = tc_act_is_multi_table_act_police, + .offload_action = tc_act_police_offload, + .destroy_action = tc_act_police_destroy, + .stats_action = tc_act_police_stats, + .get_branch_ctrl = tc_act_police_get_branch_ctrl, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c new file mode 100644 index 0000000000..80b4bc6438 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" + +static int +tc_act_parse_ptype(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + + if (act->ptype != PACKET_HOST) { + NL_SET_ERR_MSG_MOD(extack, "skbedit ptype is only supported with type host"); + return -EOPNOTSUPP; + } + + parse_state->ptype_host = true; + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_ptype = { + .parse_action = tc_act_parse_ptype, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c new file mode 100644 index 0000000000..2d1d4a0450 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct net_device *out_dev = act->dev; + struct mlx5_esw_flow_attr *esw_attr; + + parse_attr = attr->parse_attr; + esw_attr = attr->esw_attr; + + if (!out_dev) + return false; + + if (!netif_is_ovs_master(out_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "redirect to ingress is supported only for OVS internal ports"); + return false; + } + + if (netif_is_ovs_master(parse_attr->filter_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "redirect to ingress is not supported from internal port"); + return false; + } + + if (!parse_state->ptype_host) { + NL_SET_ERR_MSG_MOD(extack, + "redirect to int port ingress requires ptype=host action"); + return false; + } + + if (esw_attr->out_count) { + NL_SET_ERR_MSG_MOD(extack, + "redirect to int port ingress is supported only as single destination"); + return false; + } + + return true; +} + +static int +tc_act_parse_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct net_device *out_dev = act->dev; + int err; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + err = mlx5e_set_fwd_to_int_port_actions(priv, attr, out_dev->ifindex, + MLX5E_TC_INT_PORT_INGRESS, + &attr->action, esw_attr->out_count); + if (err) + return err; + + parse_state->if_count = 0; + esw_attr->out_count++; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_redirect_ingress = { + .can_offload = tc_act_can_offload_redirect_ingress, + .parse_action = tc_act_parse_redirect_ingress, +}; + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c new file mode 100644 index 0000000000..2df02f99ce --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include +#include "act.h" +#include "en/tc_priv.h" +#include "en/tc/act/sample.h" + +static int +tc_act_parse_sample(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_sample_attr *sample_attr = &attr->sample_attr; + + sample_attr->rate = act->sample.rate; + sample_attr->group_num = act->sample.psample_group->group_num; + + if (act->sample.truncate) + sample_attr->trunc_size = act->sample.trunc_size; + + attr->flags |= MLX5_ATTR_FLAG_SAMPLE; + flow_flag_set(parse_state->flow, SAMPLE); + + return 0; +} + +bool +mlx5e_tc_act_sample_is_multi_table(struct mlx5_core_dev *mdev, + struct mlx5_flow_attr *attr) +{ + if (MLX5_CAP_GEN(mdev, reg_c_preserve) || + attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) + return true; + + return false; +} + +static bool +tc_act_is_multi_table_act_sample(struct mlx5e_priv *priv, + const struct flow_action_entry *act, + struct mlx5_flow_attr *attr) +{ + return mlx5e_tc_act_sample_is_multi_table(priv->mdev, attr); +} + +struct mlx5e_tc_act mlx5e_tc_act_sample = { + .parse_action = tc_act_parse_sample, + .is_multi_table_act = tc_act_is_multi_table_act_sample, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h new file mode 100644 index 0000000000..3efb3a15c5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_TC_ACT_SAMPLE_H__ +#define __MLX5_EN_TC_ACT_SAMPLE_H__ + +#include +#include "en/tc_priv.h" + +bool +mlx5e_tc_act_sample_is_multi_table(struct mlx5_core_dev *mdev, + struct mlx5_flow_attr *attr); + +#endif /* __MLX5_EN_TC_ACT_SAMPLE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c new file mode 100644 index 0000000000..1b78bd9c10 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" +#include "eswitch.h" + +static int +tc_act_parse_trap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->dest_ft = mlx5_eswitch_get_slow_fdb(priv->mdev->priv.eswitch); + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_trap = { + .parse_action = tc_act_parse_trap, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c new file mode 100644 index 0000000000..f1cae21c2c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_tun_encap.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_tun_encap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + if (!act->tunnel) { + NL_SET_ERR_MSG_MOD(parse_state->extack, + "Zero tunnel attributes is not supported"); + return false; + } + + return true; +} + +static int +tc_act_parse_tun_encap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + parse_state->tun_info = act->tunnel; + parse_state->encap = true; + + return 0; +} + +static int +tc_act_parse_tun_decap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + parse_state->decap = true; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_tun_encap = { + .can_offload = tc_act_can_offload_tun_encap, + .parse_action = tc_act_parse_tun_encap, +}; + +struct mlx5e_tc_act mlx5e_tc_act_tun_decap = { + .parse_action = tc_act_parse_tun_decap, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c new file mode 100644 index 0000000000..a13c5e707b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include +#include "act.h" +#include "vlan.h" +#include "en/tc_priv.h" + +static int +add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, + u32 *action, struct netlink_ext_ack *extack) +{ + const struct flow_action_entry prio_tag_act = { + .vlan.vid = 0, + .vlan.prio = + MLX5_GET(fte_match_set_lyr_2_4, + mlx5e_get_match_headers_value(*action, + &parse_attr->spec), + first_prio) & + MLX5_GET(fte_match_set_lyr_2_4, + mlx5e_get_match_headers_criteria(*action, + &parse_attr->spec), + first_prio), + }; + + return mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, + &prio_tag_act, parse_attr, action, + extack); +} + +static int +parse_tc_vlan_action(struct mlx5e_priv *priv, + const struct flow_action_entry *act, + struct mlx5_esw_flow_attr *attr, + u32 *action, + struct netlink_ext_ack *extack, + struct mlx5e_tc_act_parse_state *parse_state) +{ + u8 vlan_idx = attr->total_vlan; + + if (vlan_idx >= MLX5_FS_VLAN_DEPTH) { + NL_SET_ERR_MSG_MOD(extack, "Total vlans used is greater than supported"); + return -EOPNOTSUPP; + } + + if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, vlan_idx)) { + NL_SET_ERR_MSG_MOD(extack, "firmware vlan actions is not supported"); + return -EOPNOTSUPP; + } + + switch (act->id) { + case FLOW_ACTION_VLAN_POP: + if (vlan_idx) + *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2; + else + *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + break; + case FLOW_ACTION_VLAN_PUSH: + attr->vlan_vid[vlan_idx] = act->vlan.vid; + attr->vlan_prio[vlan_idx] = act->vlan.prio; + attr->vlan_proto[vlan_idx] = act->vlan.proto; + if (!attr->vlan_proto[vlan_idx]) + attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q); + + if (vlan_idx) + *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; + else + *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + break; + case FLOW_ACTION_VLAN_POP_ETH: + parse_state->eth_pop = true; + break; + case FLOW_ACTION_VLAN_PUSH_ETH: + if (!flow_flag_test(parse_state->flow, L3_TO_L2_DECAP)) + return -EOPNOTSUPP; + parse_state->eth_push = true; + memcpy(attr->eth.h_dest, act->vlan_push_eth.dst, ETH_ALEN); + memcpy(attr->eth.h_source, act->vlan_push_eth.src, ETH_ALEN); + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unexpected action id for VLAN"); + return -EINVAL; + } + + attr->total_vlan = vlan_idx + 1; + + return 0; +} + +int +mlx5e_tc_act_vlan_add_push_action(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + struct net_device **out_dev, + struct netlink_ext_ack *extack) +{ + struct net_device *vlan_dev = *out_dev; + struct flow_action_entry vlan_act = { + .id = FLOW_ACTION_VLAN_PUSH, + .vlan.vid = vlan_dev_vlan_id(vlan_dev), + .vlan.proto = vlan_dev_vlan_proto(vlan_dev), + .vlan.prio = 0, + }; + int err; + + err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action, extack, NULL); + if (err) + return err; + + rcu_read_lock(); + *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), dev_get_iflink(vlan_dev)); + rcu_read_unlock(); + if (!*out_dev) + return -ENODEV; + + if (is_vlan_dev(*out_dev)) + err = mlx5e_tc_act_vlan_add_push_action(priv, attr, out_dev, extack); + + return err; +} + +int +mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + struct netlink_ext_ack *extack) +{ + struct flow_action_entry vlan_act = { + .id = FLOW_ACTION_VLAN_POP, + }; + int nest_level, err = 0; + + nest_level = attr->parse_attr->filter_dev->lower_level - + priv->netdev->lower_level; + while (nest_level--) { + err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action, + extack, NULL); + if (err) + return err; + } + + return err; +} + +static int +tc_act_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + int err; + + if (act->id == FLOW_ACTION_VLAN_PUSH && + (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) { + /* Replace vlan pop+push with vlan modify */ + attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + err = mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, act, + attr->parse_attr, &attr->action, + parse_state->extack); + } else { + err = parse_tc_vlan_action(priv, act, esw_attr, &attr->action, + parse_state->extack, parse_state); + } + + if (err) + return err; + + esw_attr->split_count = esw_attr->out_count; + parse_state->if_count = 0; + + return 0; +} + +static int +tc_act_post_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + int err; + + if (MLX5_CAP_GEN(esw->dev, prio_tag_required) && + attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) { + /* For prio tag mode, replace vlan pop with rewrite vlan prio + * tag rewrite. + */ + attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, + &attr->action, extack); + if (err) + return err; + } + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_vlan = { + .parse_action = tc_act_parse_vlan, + .post_parse = tc_act_post_parse_vlan, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h new file mode 100644 index 0000000000..2fa58c6f44 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_TC_ACT_VLAN_H__ +#define __MLX5_EN_TC_ACT_VLAN_H__ + +#include +#include "en/tc_priv.h" + +struct pedit_headers_action; + +int +mlx5e_tc_act_vlan_add_push_action(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + struct net_device **out_dev, + struct netlink_ext_ack *extack); + +int +mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + struct netlink_ext_ack *extack); + +int +mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace, + const struct flow_action_entry *act, + struct mlx5e_tc_flow_parse_attr *parse_attr, + u32 *action, struct netlink_ext_ack *extack); + +#endif /* __MLX5_EN_TC_ACT_VLAN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c new file mode 100644 index 0000000000..f17575b097 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include +#include "act.h" +#include "vlan.h" +#include "en/tc_priv.h" + +struct pedit_headers_action; + +int +mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace, + const struct flow_action_entry *act, + struct mlx5e_tc_flow_parse_attr *parse_attr, + u32 *action, struct netlink_ext_ack *extack) +{ + u16 mask16 = VLAN_VID_MASK; + u16 val16 = act->vlan.vid & VLAN_VID_MASK; + const struct flow_action_entry pedit_act = { + .id = FLOW_ACTION_MANGLE, + .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH, + .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI), + .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16), + .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16), + }; + u8 match_prio_mask, match_prio_val; + void *headers_c, *headers_v; + int err; + + headers_c = mlx5e_get_match_headers_criteria(*action, &parse_attr->spec); + headers_v = mlx5e_get_match_headers_value(*action, &parse_attr->spec); + + if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) && + MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) { + NL_SET_ERR_MSG_MOD(extack, "VLAN rewrite action must have VLAN protocol match"); + return -EOPNOTSUPP; + } + + match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio); + match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio); + if (act->vlan.prio != (match_prio_val & match_prio_mask)) { + NL_SET_ERR_MSG_MOD(extack, "Changing VLAN prio is not supported"); + return -EOPNOTSUPP; + } + + err = mlx5e_tc_act_pedit_parse_action(priv, &pedit_act, namespace, parse_attr->hdrs, + extack); + *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + return err; +} + +static int +tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + enum mlx5_flow_namespace_type ns_type; + int err; + + ns_type = mlx5e_get_flow_namespace(parse_state->flow); + err = mlx5e_tc_act_vlan_add_rewrite_action(priv, ns_type, act, attr->parse_attr, + &attr->action, parse_state->extack); + if (err) + return err; + + if (ns_type == MLX5_FLOW_NAMESPACE_FDB) { + attr->esw_attr->split_count = attr->esw_attr->out_count; + parse_state->if_count = 0; + } + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_vlan_mangle = { + .parse_action = tc_act_parse_vlan_mangle, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c new file mode 100644 index 0000000000..7aa926e542 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include +#include +#include "en/tc_priv.h" +#include "act_stats.h" +#include "en/fs.h" + +struct mlx5e_tc_act_stats_handle { + struct rhashtable ht; + spinlock_t ht_lock; /* protects hashtable */ +}; + +struct mlx5e_tc_act_stats { + unsigned long tc_act_cookie; + + struct mlx5_fc *counter; + u64 lastpackets; + u64 lastbytes; + + struct rhash_head hash; + struct rcu_head rcu_head; +}; + +static const struct rhashtable_params act_counters_ht_params = { + .head_offset = offsetof(struct mlx5e_tc_act_stats, hash), + .key_offset = offsetof(struct mlx5e_tc_act_stats, tc_act_cookie), + .key_len = sizeof_field(struct mlx5e_tc_act_stats, tc_act_cookie), + .automatic_shrinking = true, +}; + +struct mlx5e_tc_act_stats_handle * +mlx5e_tc_act_stats_create(void) +{ + struct mlx5e_tc_act_stats_handle *handle; + int err; + + handle = kvzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) + return ERR_PTR(-ENOMEM); + + err = rhashtable_init(&handle->ht, &act_counters_ht_params); + if (err) + goto err; + + spin_lock_init(&handle->ht_lock); + return handle; +err: + kvfree(handle); + return ERR_PTR(err); +} + +void mlx5e_tc_act_stats_free(struct mlx5e_tc_act_stats_handle *handle) +{ + rhashtable_destroy(&handle->ht); + kvfree(handle); +} + +static int +mlx5e_tc_act_stats_add(struct mlx5e_tc_act_stats_handle *handle, + unsigned long act_cookie, + struct mlx5_fc *counter) +{ + struct mlx5e_tc_act_stats *act_stats, *old_act_stats; + struct rhashtable *ht = &handle->ht; + u64 lastused; + int err = 0; + + act_stats = kvzalloc(sizeof(*act_stats), GFP_KERNEL); + if (!act_stats) + return -ENOMEM; + + act_stats->tc_act_cookie = act_cookie; + act_stats->counter = counter; + + mlx5_fc_query_cached_raw(counter, + &act_stats->lastbytes, + &act_stats->lastpackets, &lastused); + + rcu_read_lock(); + old_act_stats = rhashtable_lookup_get_insert_fast(ht, + &act_stats->hash, + act_counters_ht_params); + if (IS_ERR(old_act_stats)) { + err = PTR_ERR(old_act_stats); + goto err_hash_insert; + } else if (old_act_stats) { + err = -EEXIST; + goto err_hash_insert; + } + rcu_read_unlock(); + + return 0; + +err_hash_insert: + rcu_read_unlock(); + kvfree(act_stats); + return err; +} + +void +mlx5e_tc_act_stats_del_flow(struct mlx5e_tc_act_stats_handle *handle, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_flow_attr *attr; + struct mlx5e_tc_act_stats *act_stats; + int i; + + if (!flow_flag_test(flow, USE_ACT_STATS)) + return; + + list_for_each_entry(attr, &flow->attrs, list) { + for (i = 0; i < attr->tc_act_cookies_count; i++) { + struct rhashtable *ht = &handle->ht; + + spin_lock(&handle->ht_lock); + act_stats = rhashtable_lookup_fast(ht, + &attr->tc_act_cookies[i], + act_counters_ht_params); + if (act_stats && + rhashtable_remove_fast(ht, &act_stats->hash, + act_counters_ht_params) == 0) + kvfree_rcu(act_stats, rcu_head); + + spin_unlock(&handle->ht_lock); + } + } +} + +int +mlx5e_tc_act_stats_add_flow(struct mlx5e_tc_act_stats_handle *handle, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_fc *curr_counter = NULL; + unsigned long last_cookie = 0; + struct mlx5_flow_attr *attr; + int err; + int i; + + if (!flow_flag_test(flow, USE_ACT_STATS)) + return 0; + + list_for_each_entry(attr, &flow->attrs, list) { + if (attr->counter) + curr_counter = attr->counter; + + for (i = 0; i < attr->tc_act_cookies_count; i++) { + /* jump over identical ids (e.g. pedit)*/ + if (last_cookie == attr->tc_act_cookies[i]) + continue; + + err = mlx5e_tc_act_stats_add(handle, attr->tc_act_cookies[i], curr_counter); + if (err) + goto out_err; + last_cookie = attr->tc_act_cookies[i]; + } + } + + return 0; +out_err: + mlx5e_tc_act_stats_del_flow(handle, flow); + return err; +} + +int +mlx5e_tc_act_stats_fill_stats(struct mlx5e_tc_act_stats_handle *handle, + struct flow_offload_action *fl_act) +{ + struct rhashtable *ht = &handle->ht; + struct mlx5e_tc_act_stats *item; + u64 pkts, bytes, lastused; + int err = 0; + + rcu_read_lock(); + item = rhashtable_lookup(ht, &fl_act->cookie, act_counters_ht_params); + if (!item) { + rcu_read_unlock(); + err = -ENOENT; + goto err_out; + } + + mlx5_fc_query_cached_raw(item->counter, + &bytes, &pkts, &lastused); + + flow_stats_update(&fl_act->stats, + bytes - item->lastbytes, + pkts - item->lastpackets, + 0, lastused, FLOW_ACTION_HW_STATS_DELAYED); + + item->lastpackets = pkts; + item->lastbytes = bytes; + rcu_read_unlock(); + + return 0; + +err_out: + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.h new file mode 100644 index 0000000000..002292c256 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_ACT_STATS_H__ +#define __MLX5_EN_ACT_STATS_H__ + +#include +#include "en/tc_priv.h" + +struct mlx5e_tc_act_stats_handle; + +struct mlx5e_tc_act_stats_handle *mlx5e_tc_act_stats_create(void); +void mlx5e_tc_act_stats_free(struct mlx5e_tc_act_stats_handle *handle); + +int +mlx5e_tc_act_stats_add_flow(struct mlx5e_tc_act_stats_handle *handle, + struct mlx5e_tc_flow *flow); + +void +mlx5e_tc_act_stats_del_flow(struct mlx5e_tc_act_stats_handle *handle, + struct mlx5e_tc_flow *flow); + +int +mlx5e_tc_act_stats_fill_stats(struct mlx5e_tc_act_stats_handle *handle, + struct flow_offload_action *fl_act); + +#endif /* __MLX5_EN_ACT_STATS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h new file mode 100644 index 0000000000..bb6b1a979b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#ifndef __MLX5_EN_TC_CT_FS_H__ +#define __MLX5_EN_TC_CT_FS_H__ + +struct mlx5_ct_fs { + const struct net_device *netdev; + struct mlx5_core_dev *dev; + + /* private data */ + void *priv_data[]; +}; + +struct mlx5_ct_fs_rule { +}; + +struct mlx5_ct_fs_ops { + int (*init)(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct, + struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct); + void (*destroy)(struct mlx5_ct_fs *fs); + + struct mlx5_ct_fs_rule * (*ct_rule_add)(struct mlx5_ct_fs *fs, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr, + struct flow_rule *flow_rule); + void (*ct_rule_del)(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule); + + size_t priv_size; +}; + +static inline void *mlx5_ct_fs_priv(struct mlx5_ct_fs *fs) +{ + return &fs->priv_data; +} + +struct mlx5_ct_fs_ops *mlx5_ct_fs_dmfs_ops_get(void); + +#if IS_ENABLED(CONFIG_MLX5_SW_STEERING) +struct mlx5_ct_fs_ops *mlx5_ct_fs_smfs_ops_get(void); +#else +static inline struct mlx5_ct_fs_ops * +mlx5_ct_fs_smfs_ops_get(void) +{ + return NULL; +} +#endif /* IS_ENABLED(CONFIG_MLX5_SW_STEERING) */ + +#endif /* __MLX5_EN_TC_CT_FS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c new file mode 100644 index 0000000000..ae4f55be48 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#include "en_tc.h" +#include "en/tc_ct.h" +#include "en/tc/ct_fs.h" + +#define ct_dbg(fmt, args...)\ + netdev_dbg(fs->netdev, "ct_fs_dmfs debug: " fmt "\n", ##args) + +struct mlx5_ct_fs_dmfs_rule { + struct mlx5_ct_fs_rule fs_rule; + struct mlx5_flow_handle *rule; + struct mlx5_flow_attr *attr; +}; + +static int +mlx5_ct_fs_dmfs_init(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct, + struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct) +{ + return 0; +} + +static void +mlx5_ct_fs_dmfs_destroy(struct mlx5_ct_fs *fs) +{ +} + +static struct mlx5_ct_fs_rule * +mlx5_ct_fs_dmfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr, struct flow_rule *flow_rule) +{ + struct mlx5e_priv *priv = netdev_priv(fs->netdev); + struct mlx5_ct_fs_dmfs_rule *dmfs_rule; + int err; + + dmfs_rule = kzalloc(sizeof(*dmfs_rule), GFP_KERNEL); + if (!dmfs_rule) + return ERR_PTR(-ENOMEM); + + dmfs_rule->rule = mlx5_tc_rule_insert(priv, spec, attr); + if (IS_ERR(dmfs_rule->rule)) { + err = PTR_ERR(dmfs_rule->rule); + ct_dbg("Failed to add ct entry fs rule"); + goto err_insert; + } + + dmfs_rule->attr = attr; + + return &dmfs_rule->fs_rule; + +err_insert: + kfree(dmfs_rule); + return ERR_PTR(err); +} + +static void +mlx5_ct_fs_dmfs_ct_rule_del(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule) +{ + struct mlx5_ct_fs_dmfs_rule *dmfs_rule = container_of(fs_rule, + struct mlx5_ct_fs_dmfs_rule, + fs_rule); + + mlx5_tc_rule_delete(netdev_priv(fs->netdev), dmfs_rule->rule, dmfs_rule->attr); + kfree(dmfs_rule); +} + +static struct mlx5_ct_fs_ops dmfs_ops = { + .ct_rule_add = mlx5_ct_fs_dmfs_ct_rule_add, + .ct_rule_del = mlx5_ct_fs_dmfs_ct_rule_del, + + .init = mlx5_ct_fs_dmfs_init, + .destroy = mlx5_ct_fs_dmfs_destroy, +}; + +struct mlx5_ct_fs_ops *mlx5_ct_fs_dmfs_ops_get(void) +{ + return &dmfs_ops; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c new file mode 100644 index 0000000000..8c531f4ec9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c @@ -0,0 +1,385 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#include + +#include "en_tc.h" +#include "en/tc_priv.h" +#include "en/tc_ct.h" +#include "en/tc/ct_fs.h" + +#include "lib/smfs.h" + +#define INIT_ERR_PREFIX "ct_fs_smfs init failed" +#define ct_dbg(fmt, args...)\ + netdev_dbg(fs->netdev, "ct_fs_smfs debug: " fmt "\n", ##args) +#define MLX5_CT_TCP_FLAGS_MASK cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16) + +struct mlx5_ct_fs_smfs_matcher { + struct mlx5dr_matcher *dr_matcher; + struct list_head list; + int prio; + refcount_t ref; +}; + +struct mlx5_ct_fs_smfs_matchers { + struct mlx5_ct_fs_smfs_matcher smfs_matchers[6]; + struct list_head used; +}; + +struct mlx5_ct_fs_smfs { + struct mlx5dr_table *ct_tbl, *ct_nat_tbl; + struct mlx5_ct_fs_smfs_matchers matchers; + struct mlx5_ct_fs_smfs_matchers matchers_nat; + struct mlx5dr_action *fwd_action; + struct mlx5_flow_table *ct_nat; + struct mutex lock; /* Guards matchers */ +}; + +struct mlx5_ct_fs_smfs_rule { + struct mlx5_ct_fs_rule fs_rule; + struct mlx5dr_rule *rule; + struct mlx5dr_action *count_action; + struct mlx5_ct_fs_smfs_matcher *smfs_matcher; +}; + +static inline void +mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bool ipv4, bool tcp, + bool gre) +{ + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); + + if (likely(MLX5_CAP_FLOWTABLE_NIC_RX(fs->dev, ft_field_support.outer_ip_version))) + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version); + else + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); + if (likely(ipv4)) { + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + } else { + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xFF, + MLX5_FLD_SZ_BYTES(fte_match_set_lyr_2_4, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6)); + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + 0xFF, + MLX5_FLD_SZ_BYTES(fte_match_set_lyr_2_4, + src_ipv4_src_ipv6.ipv6_layout.ipv6)); + } + + if (likely(tcp)) { + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, tcp_sport); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, tcp_dport); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, + ntohs(MLX5_CT_TCP_FLAGS_MASK)); + } else if (!gre) { + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_sport); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_dport); + } + + mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, 0, MLX5_CT_ZONE_MASK); +} + +static struct mlx5dr_matcher * +mlx5_ct_fs_smfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5dr_table *tbl, bool ipv4, + bool tcp, bool gre, u32 priority) +{ + struct mlx5dr_matcher *dr_matcher; + struct mlx5_flow_spec *spec; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + mlx5_ct_fs_smfs_fill_mask(fs, spec, ipv4, tcp, gre); + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2 | MLX5_MATCH_OUTER_HEADERS; + + dr_matcher = mlx5_smfs_matcher_create(tbl, priority, spec); + kvfree(spec); + if (!dr_matcher) + return ERR_PTR(-EINVAL); + + return dr_matcher; +} + +static struct mlx5_ct_fs_smfs_matcher * +mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp, bool gre) +{ + struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs); + struct mlx5_ct_fs_smfs_matcher *m, *smfs_matcher; + struct mlx5_ct_fs_smfs_matchers *matchers; + struct mlx5dr_matcher *dr_matcher; + struct mlx5dr_table *tbl; + struct list_head *prev; + int prio; + + matchers = nat ? &fs_smfs->matchers_nat : &fs_smfs->matchers; + smfs_matcher = &matchers->smfs_matchers[ipv4 * 3 + tcp * 2 + gre]; + + if (refcount_inc_not_zero(&smfs_matcher->ref)) + return smfs_matcher; + + mutex_lock(&fs_smfs->lock); + + /* Retry with lock, as another thread might have already created the relevant matcher + * till we acquired the lock + */ + if (refcount_inc_not_zero(&smfs_matcher->ref)) + goto out_unlock; + + // Find next available priority in sorted used list + prio = 0; + prev = &matchers->used; + list_for_each_entry(m, &matchers->used, list) { + prev = &m->list; + + if (m->prio == prio) + prio = m->prio + 1; + else + break; + } + + tbl = nat ? fs_smfs->ct_nat_tbl : fs_smfs->ct_tbl; + dr_matcher = mlx5_ct_fs_smfs_matcher_create(fs, tbl, ipv4, tcp, gre, prio); + if (IS_ERR(dr_matcher)) { + netdev_warn(fs->netdev, + "ct_fs_smfs: failed to create matcher (nat %d, ipv4 %d, tcp %d, gre %d), err: %ld\n", + nat, ipv4, tcp, gre, PTR_ERR(dr_matcher)); + + smfs_matcher = ERR_CAST(dr_matcher); + goto out_unlock; + } + + smfs_matcher->dr_matcher = dr_matcher; + smfs_matcher->prio = prio; + list_add(&smfs_matcher->list, prev); + refcount_set(&smfs_matcher->ref, 1); + +out_unlock: + mutex_unlock(&fs_smfs->lock); + return smfs_matcher; +} + +static void +mlx5_ct_fs_smfs_matcher_put(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_smfs_matcher *smfs_matcher) +{ + struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs); + + if (!refcount_dec_and_mutex_lock(&smfs_matcher->ref, &fs_smfs->lock)) + return; + + mlx5_smfs_matcher_destroy(smfs_matcher->dr_matcher); + list_del(&smfs_matcher->list); + mutex_unlock(&fs_smfs->lock); +} + +static int +mlx5_ct_fs_smfs_init(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct, + struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct) +{ + struct mlx5dr_table *ct_tbl, *ct_nat_tbl, *post_ct_tbl; + struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs); + + post_ct_tbl = mlx5_smfs_table_get_from_fs_ft(post_ct); + ct_nat_tbl = mlx5_smfs_table_get_from_fs_ft(ct_nat); + ct_tbl = mlx5_smfs_table_get_from_fs_ft(ct); + fs_smfs->ct_nat = ct_nat; + + if (!ct_tbl || !ct_nat_tbl || !post_ct_tbl) { + netdev_warn(fs->netdev, "ct_fs_smfs: failed to init, missing backing dr tables"); + return -EOPNOTSUPP; + } + + ct_dbg("using smfs steering"); + + fs_smfs->fwd_action = mlx5_smfs_action_create_dest_table(post_ct_tbl); + if (!fs_smfs->fwd_action) { + return -EINVAL; + } + + fs_smfs->ct_tbl = ct_tbl; + fs_smfs->ct_nat_tbl = ct_nat_tbl; + mutex_init(&fs_smfs->lock); + INIT_LIST_HEAD(&fs_smfs->matchers.used); + INIT_LIST_HEAD(&fs_smfs->matchers_nat.used); + + return 0; +} + +static void +mlx5_ct_fs_smfs_destroy(struct mlx5_ct_fs *fs) +{ + struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs); + + mlx5_smfs_action_destroy(fs_smfs->fwd_action); +} + +static inline bool +mlx5_tc_ct_valid_used_dissector_keys(const u64 used_keys) +{ +#define DISS_BIT(name) BIT_ULL(FLOW_DISSECTOR_KEY_ ## name) + const u64 basic_keys = DISS_BIT(BASIC) | DISS_BIT(CONTROL) | + DISS_BIT(META); + const u64 ipv4_tcp = basic_keys | DISS_BIT(IPV4_ADDRS) | + DISS_BIT(PORTS) | DISS_BIT(TCP); + const u64 ipv6_tcp = basic_keys | DISS_BIT(IPV6_ADDRS) | + DISS_BIT(PORTS) | DISS_BIT(TCP); + const u64 ipv4_udp = basic_keys | DISS_BIT(IPV4_ADDRS) | + DISS_BIT(PORTS); + const u64 ipv6_udp = basic_keys | DISS_BIT(IPV6_ADDRS) | + DISS_BIT(PORTS); + const u64 ipv4_gre = basic_keys | DISS_BIT(IPV4_ADDRS); + const u64 ipv6_gre = basic_keys | DISS_BIT(IPV6_ADDRS); + + return (used_keys == ipv4_tcp || used_keys == ipv4_udp || used_keys == ipv6_tcp || + used_keys == ipv6_udp || used_keys == ipv4_gre || used_keys == ipv6_gre); +} + +static bool +mlx5_ct_fs_smfs_ct_validate_flow_rule(struct mlx5_ct_fs *fs, struct flow_rule *flow_rule) +{ + struct flow_match_ipv4_addrs ipv4_addrs; + struct flow_match_ipv6_addrs ipv6_addrs; + struct flow_match_control control; + struct flow_match_basic basic; + struct flow_match_ports ports; + struct flow_match_tcp tcp; + + if (!mlx5_tc_ct_valid_used_dissector_keys(flow_rule->match.dissector->used_keys)) { + ct_dbg("rule uses unexpected dissectors (0x%016llx)", + flow_rule->match.dissector->used_keys); + return false; + } + + flow_rule_match_basic(flow_rule, &basic); + flow_rule_match_control(flow_rule, &control); + flow_rule_match_ipv4_addrs(flow_rule, &ipv4_addrs); + flow_rule_match_ipv6_addrs(flow_rule, &ipv6_addrs); + if (basic.key->ip_proto != IPPROTO_GRE) + flow_rule_match_ports(flow_rule, &ports); + if (basic.key->ip_proto == IPPROTO_TCP) + flow_rule_match_tcp(flow_rule, &tcp); + + if (basic.mask->n_proto != htons(0xFFFF) || + (basic.key->n_proto != htons(ETH_P_IP) && basic.key->n_proto != htons(ETH_P_IPV6)) || + basic.mask->ip_proto != 0xFF || + (basic.key->ip_proto != IPPROTO_UDP && basic.key->ip_proto != IPPROTO_TCP && + basic.key->ip_proto != IPPROTO_GRE)) { + ct_dbg("rule uses unexpected basic match (n_proto 0x%04x/0x%04x, ip_proto 0x%02x/0x%02x)", + ntohs(basic.key->n_proto), ntohs(basic.mask->n_proto), + basic.key->ip_proto, basic.mask->ip_proto); + return false; + } + + if (basic.key->ip_proto != IPPROTO_GRE && + (ports.mask->src != htons(0xFFFF) || ports.mask->dst != htons(0xFFFF))) { + ct_dbg("rule uses ports match (src 0x%04x, dst 0x%04x)", + ports.mask->src, ports.mask->dst); + return false; + } + + if (basic.key->ip_proto == IPPROTO_TCP && tcp.mask->flags != MLX5_CT_TCP_FLAGS_MASK) { + ct_dbg("rule uses unexpected tcp match (flags 0x%02x)", tcp.mask->flags); + return false; + } + + return true; +} + +static struct mlx5_ct_fs_rule * +mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr, struct flow_rule *flow_rule) +{ + struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs); + struct mlx5_ct_fs_smfs_matcher *smfs_matcher; + struct mlx5_ct_fs_smfs_rule *smfs_rule; + struct mlx5dr_action *actions[5]; + struct mlx5dr_rule *rule; + int num_actions = 0, err; + bool nat, tcp, ipv4, gre; + + if (!mlx5_ct_fs_smfs_ct_validate_flow_rule(fs, flow_rule)) + return ERR_PTR(-EOPNOTSUPP); + + smfs_rule = kzalloc(sizeof(*smfs_rule), GFP_KERNEL); + if (!smfs_rule) + return ERR_PTR(-ENOMEM); + + smfs_rule->count_action = mlx5_smfs_action_create_flow_counter(mlx5_fc_id(attr->counter)); + if (!smfs_rule->count_action) { + err = -EINVAL; + goto err_count; + } + + actions[num_actions++] = smfs_rule->count_action; + actions[num_actions++] = attr->modify_hdr->action.dr_action; + actions[num_actions++] = fs_smfs->fwd_action; + + nat = (attr->ft == fs_smfs->ct_nat); + ipv4 = mlx5e_tc_get_ip_version(spec, true) == 4; + tcp = MLX5_GET(fte_match_param, spec->match_value, + outer_headers.ip_protocol) == IPPROTO_TCP; + gre = MLX5_GET(fte_match_param, spec->match_value, + outer_headers.ip_protocol) == IPPROTO_GRE; + + smfs_matcher = mlx5_ct_fs_smfs_matcher_get(fs, nat, ipv4, tcp, gre); + if (IS_ERR(smfs_matcher)) { + err = PTR_ERR(smfs_matcher); + goto err_matcher; + } + + rule = mlx5_smfs_rule_create(smfs_matcher->dr_matcher, spec, num_actions, actions, + spec->flow_context.flow_source); + if (!rule) { + err = -EINVAL; + goto err_create; + } + + smfs_rule->rule = rule; + smfs_rule->smfs_matcher = smfs_matcher; + + return &smfs_rule->fs_rule; + +err_create: + mlx5_ct_fs_smfs_matcher_put(fs, smfs_matcher); +err_matcher: + mlx5_smfs_action_destroy(smfs_rule->count_action); +err_count: + kfree(smfs_rule); + return ERR_PTR(err); +} + +static void +mlx5_ct_fs_smfs_ct_rule_del(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule) +{ + struct mlx5_ct_fs_smfs_rule *smfs_rule = container_of(fs_rule, + struct mlx5_ct_fs_smfs_rule, + fs_rule); + + mlx5_smfs_rule_destroy(smfs_rule->rule); + mlx5_ct_fs_smfs_matcher_put(fs, smfs_rule->smfs_matcher); + mlx5_smfs_action_destroy(smfs_rule->count_action); + kfree(smfs_rule); +} + +static struct mlx5_ct_fs_ops fs_smfs_ops = { + .ct_rule_add = mlx5_ct_fs_smfs_ct_rule_add, + .ct_rule_del = mlx5_ct_fs_smfs_ct_rule_del, + + .init = mlx5_ct_fs_smfs_init, + .destroy = mlx5_ct_fs_smfs_destroy, + + .priv_size = sizeof(struct mlx5_ct_fs_smfs), +}; + +struct mlx5_ct_fs_ops * +mlx5_ct_fs_smfs_ops_get(void) +{ + return &fs_smfs_ops; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c new file mode 100644 index 0000000000..8afcec0c5d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c @@ -0,0 +1,457 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include +#include "en/mapping.h" +#include "en/tc/int_port.h" +#include "en.h" +#include "en_rep.h" +#include "en_tc.h" + +struct mlx5e_tc_int_port { + enum mlx5e_tc_int_port_type type; + int ifindex; + u32 match_metadata; + u32 mapping; + struct list_head list; + struct mlx5_flow_handle *rx_rule; + refcount_t refcnt; + struct rcu_head rcu_head; +}; + +struct mlx5e_tc_int_port_priv { + struct mlx5_core_dev *dev; + struct mutex int_ports_lock; /* Protects int ports list */ + struct list_head int_ports; /* Uses int_ports_lock */ + u16 num_ports; + bool ul_rep_rx_ready; /* Set when uplink is performing teardown */ + struct mapping_ctx *metadata_mapping; /* Metadata for source port rewrite and matching */ +}; + +bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw) +{ + return mlx5_eswitch_vport_match_metadata_enabled(esw) && + MLX5_CAP_GEN(esw->dev, reg_c_preserve); +} + +u32 mlx5e_tc_int_port_get_metadata(struct mlx5e_tc_int_port *int_port) +{ + return int_port->match_metadata; +} + +int mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port) +{ + /* For egress forwarding we can have the case + * where the packet came from a vport and redirected + * to int port or it came from the uplink, going + * via internal port and hairpinned back to uplink + * so we set the source to any port in this case. + */ + return int_port->type == MLX5E_TC_INT_PORT_EGRESS ? + MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT : + MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; +} + +u32 mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port) +{ + return int_port->match_metadata << (32 - ESW_SOURCE_PORT_METADATA_BITS); +} + +static struct mlx5_flow_handle * +mlx5e_int_port_create_rx_rule(struct mlx5_eswitch *esw, + struct mlx5e_tc_int_port *int_port, + struct mlx5_flow_destination *dest) + +{ + struct mlx5_flow_context *flow_context; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_spec *spec; + void *misc; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5e_tc_int_port_get_metadata_for_match(int_port)); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + + /* Overwrite flow tag with the int port metadata mapping + * instead of the chain mapping. + */ + flow_context = &spec->flow_context; + flow_context->flags |= FLOW_CONTEXT_HAS_TAG; + flow_context->flow_tag = int_port->mapping; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec, + &flow_act, dest, 1); + if (IS_ERR(flow_rule)) + mlx5_core_warn(esw->dev, "ft offloads: Failed to add internal vport rx rule err %ld\n", + PTR_ERR(flow_rule)); + + kvfree(spec); + + return flow_rule; +} + +static struct mlx5e_tc_int_port * +mlx5e_int_port_lookup(struct mlx5e_tc_int_port_priv *priv, + int ifindex, + enum mlx5e_tc_int_port_type type) +{ + struct mlx5e_tc_int_port *int_port; + + if (!priv->ul_rep_rx_ready) + goto not_found; + + list_for_each_entry(int_port, &priv->int_ports, list) + if (int_port->ifindex == ifindex && int_port->type == type) { + refcount_inc(&int_port->refcnt); + return int_port; + } + +not_found: + return NULL; +} + +static int mlx5e_int_port_metadata_alloc(struct mlx5e_tc_int_port_priv *priv, + int ifindex, enum mlx5e_tc_int_port_type type, + u32 *id) +{ + u32 mapped_key[2] = {type, ifindex}; + int err; + + err = mapping_add(priv->metadata_mapping, mapped_key, id); + if (err) + return err; + + /* Fill upper 4 bits of PFNUM with reserved value */ + *id |= 0xf << ESW_VPORT_BITS; + + return 0; +} + +static void mlx5e_int_port_metadata_free(struct mlx5e_tc_int_port_priv *priv, + u32 id) +{ + id &= (1 << ESW_VPORT_BITS) - 1; + mapping_remove(priv->metadata_mapping, id); +} + +/* Must be called with priv->int_ports_lock held */ +static struct mlx5e_tc_int_port * +mlx5e_int_port_add(struct mlx5e_tc_int_port_priv *priv, + int ifindex, + enum mlx5e_tc_int_port_type type) +{ + struct mlx5_eswitch *esw = priv->dev->priv.eswitch; + struct mlx5_mapped_obj mapped_obj = {}; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5e_tc_int_port *int_port; + struct mlx5_flow_destination dest; + struct mapping_ctx *ctx; + u32 match_metadata; + u32 mapping; + int err; + + if (priv->num_ports == MLX5E_TC_MAX_INT_PORT_NUM) { + mlx5_core_dbg(priv->dev, "Cannot add a new int port, max supported %d", + MLX5E_TC_MAX_INT_PORT_NUM); + return ERR_PTR(-ENOSPC); + } + + int_port = kzalloc(sizeof(*int_port), GFP_KERNEL); + if (!int_port) + return ERR_PTR(-ENOMEM); + + err = mlx5e_int_port_metadata_alloc(priv, ifindex, type, &match_metadata); + if (err) { + mlx5_core_warn(esw->dev, "Cannot add a new internal port, metadata allocation failed for ifindex %d", + ifindex); + goto err_metadata; + } + + /* map metadata to reg_c0 object for miss handling */ + ctx = esw->offloads.reg_c0_obj_pool; + mapped_obj.type = MLX5_MAPPED_OBJ_INT_PORT_METADATA; + mapped_obj.int_port_metadata = match_metadata; + err = mapping_add(ctx, &mapped_obj, &mapping); + if (err) + goto err_map; + + int_port->type = type; + int_port->ifindex = ifindex; + int_port->match_metadata = match_metadata; + int_port->mapping = mapping; + + /* Create a match on internal vport metadata in vport table */ + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = uplink_rpriv->root_ft; + + int_port->rx_rule = mlx5e_int_port_create_rx_rule(esw, int_port, &dest); + if (IS_ERR(int_port->rx_rule)) { + err = PTR_ERR(int_port->rx_rule); + mlx5_core_warn(esw->dev, "Can't add internal port rx rule, err %d", err); + goto err_rx_rule; + } + + refcount_set(&int_port->refcnt, 1); + list_add_rcu(&int_port->list, &priv->int_ports); + priv->num_ports++; + + return int_port; + +err_rx_rule: + mapping_remove(ctx, int_port->mapping); + +err_map: + mlx5e_int_port_metadata_free(priv, match_metadata); + +err_metadata: + kfree(int_port); + + return ERR_PTR(err); +} + +/* Must be called with priv->int_ports_lock held */ +static void +mlx5e_int_port_remove(struct mlx5e_tc_int_port_priv *priv, + struct mlx5e_tc_int_port *int_port) +{ + struct mlx5_eswitch *esw = priv->dev->priv.eswitch; + struct mapping_ctx *ctx; + + ctx = esw->offloads.reg_c0_obj_pool; + + list_del_rcu(&int_port->list); + + /* The following parameters are not used by the + * rcu readers of this int_port object so it is + * safe to release them. + */ + if (int_port->rx_rule) + mlx5_del_flow_rules(int_port->rx_rule); + mapping_remove(ctx, int_port->mapping); + mlx5e_int_port_metadata_free(priv, int_port->match_metadata); + kfree_rcu_mightsleep(int_port); + priv->num_ports--; +} + +/* Must be called with rcu_read_lock held */ +static struct mlx5e_tc_int_port * +mlx5e_int_port_get_from_metadata(struct mlx5e_tc_int_port_priv *priv, + u32 metadata) +{ + struct mlx5e_tc_int_port *int_port; + + list_for_each_entry_rcu(int_port, &priv->int_ports, list) + if (int_port->match_metadata == metadata) + return int_port; + + return NULL; +} + +struct mlx5e_tc_int_port * +mlx5e_tc_int_port_get(struct mlx5e_tc_int_port_priv *priv, + int ifindex, + enum mlx5e_tc_int_port_type type) +{ + struct mlx5e_tc_int_port *int_port; + + if (!priv) + return ERR_PTR(-EOPNOTSUPP); + + mutex_lock(&priv->int_ports_lock); + + /* Reject request if ul rep not ready */ + if (!priv->ul_rep_rx_ready) { + int_port = ERR_PTR(-EOPNOTSUPP); + goto done; + } + + int_port = mlx5e_int_port_lookup(priv, ifindex, type); + if (int_port) + goto done; + + /* Alloc and add new int port to list */ + int_port = mlx5e_int_port_add(priv, ifindex, type); + +done: + mutex_unlock(&priv->int_ports_lock); + + return int_port; +} + +void +mlx5e_tc_int_port_put(struct mlx5e_tc_int_port_priv *priv, + struct mlx5e_tc_int_port *int_port) +{ + if (!refcount_dec_and_mutex_lock(&int_port->refcnt, &priv->int_ports_lock)) + return; + + mlx5e_int_port_remove(priv, int_port); + mutex_unlock(&priv->int_ports_lock); +} + +struct mlx5e_tc_int_port_priv * +mlx5e_tc_int_port_init(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_int_port_priv *int_port_priv; + u64 mapping_id; + + if (!mlx5e_tc_int_port_supported(esw)) + return NULL; + + int_port_priv = kzalloc(sizeof(*int_port_priv), GFP_KERNEL); + if (!int_port_priv) + return NULL; + + mapping_id = mlx5_query_nic_system_image_guid(priv->mdev); + + int_port_priv->metadata_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_INT_PORT, + sizeof(u32) * 2, + (1 << ESW_VPORT_BITS) - 1, true); + if (IS_ERR(int_port_priv->metadata_mapping)) { + mlx5_core_warn(priv->mdev, "Can't allocate metadata mapping of int port offload, err=%ld\n", + PTR_ERR(int_port_priv->metadata_mapping)); + goto err_mapping; + } + + int_port_priv->dev = priv->mdev; + mutex_init(&int_port_priv->int_ports_lock); + INIT_LIST_HEAD(&int_port_priv->int_ports); + + return int_port_priv; + +err_mapping: + kfree(int_port_priv); + + return NULL; +} + +void +mlx5e_tc_int_port_cleanup(struct mlx5e_tc_int_port_priv *priv) +{ + if (!priv) + return; + + mutex_destroy(&priv->int_ports_lock); + mapping_destroy(priv->metadata_mapping); + kfree(priv); +} + +/* Int port rx rules reside in ul rep rx tables. + * It is possible the ul rep will go down while there are + * still int port rules in its rx table so proper cleanup + * is required to free resources. + */ +void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_tc_int_port_priv *ppriv; + struct mlx5e_rep_priv *uplink_rpriv; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + ppriv = uplink_priv->int_port_priv; + + if (!ppriv) + return; + + mutex_lock(&ppriv->int_ports_lock); + ppriv->ul_rep_rx_ready = true; + mutex_unlock(&ppriv->int_ports_lock); +} + +void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_tc_int_port_priv *ppriv; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5e_tc_int_port *int_port; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + ppriv = uplink_priv->int_port_priv; + + if (!ppriv) + return; + + mutex_lock(&ppriv->int_ports_lock); + + ppriv->ul_rep_rx_ready = false; + + list_for_each_entry(int_port, &ppriv->int_ports, list) { + if (!IS_ERR_OR_NULL(int_port->rx_rule)) + mlx5_del_flow_rules(int_port->rx_rule); + + int_port->rx_rule = NULL; + } + + mutex_unlock(&ppriv->int_ports_lock); +} + +bool +mlx5e_tc_int_port_dev_fwd(struct mlx5e_tc_int_port_priv *priv, + struct sk_buff *skb, u32 int_vport_metadata, + bool *forward_tx) +{ + enum mlx5e_tc_int_port_type fwd_type; + struct mlx5e_tc_int_port *int_port; + struct net_device *dev; + int ifindex; + + if (!priv) + return false; + + rcu_read_lock(); + int_port = mlx5e_int_port_get_from_metadata(priv, int_vport_metadata); + if (!int_port) { + rcu_read_unlock(); + mlx5_core_dbg(priv->dev, "Unable to find int port with metadata 0x%.8x\n", + int_vport_metadata); + return false; + } + + ifindex = int_port->ifindex; + fwd_type = int_port->type; + rcu_read_unlock(); + + dev = dev_get_by_index(&init_net, ifindex); + if (!dev) { + mlx5_core_dbg(priv->dev, + "Couldn't find internal port device with ifindex: %d\n", + ifindex); + return false; + } + + skb->skb_iif = dev->ifindex; + skb->dev = dev; + + if (fwd_type == MLX5E_TC_INT_PORT_INGRESS) { + skb->pkt_type = PACKET_HOST; + skb_set_redirected(skb, true); + *forward_tx = false; + } else { + skb_reset_network_header(skb); + skb_push_rcsum(skb, skb->mac_len); + skb_set_redirected(skb, false); + *forward_tx = true; + } + + return true; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h new file mode 100644 index 0000000000..e72c79d308 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_TC_INT_PORT_H__ +#define __MLX5_EN_TC_INT_PORT_H__ + +#include "en.h" + +struct mlx5e_tc_int_port; +struct mlx5e_tc_int_port_priv; + +enum mlx5e_tc_int_port_type { + MLX5E_TC_INT_PORT_INGRESS, + MLX5E_TC_INT_PORT_EGRESS, +}; + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) +bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw); + +struct mlx5e_tc_int_port_priv * +mlx5e_tc_int_port_init(struct mlx5e_priv *priv); +void +mlx5e_tc_int_port_cleanup(struct mlx5e_tc_int_port_priv *priv); + +void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv); +void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv); + +bool +mlx5e_tc_int_port_dev_fwd(struct mlx5e_tc_int_port_priv *priv, + struct sk_buff *skb, u32 int_vport_metadata, + bool *forward_tx); +struct mlx5e_tc_int_port * +mlx5e_tc_int_port_get(struct mlx5e_tc_int_port_priv *priv, + int ifindex, + enum mlx5e_tc_int_port_type type); +void +mlx5e_tc_int_port_put(struct mlx5e_tc_int_port_priv *priv, + struct mlx5e_tc_int_port *int_port); + +u32 mlx5e_tc_int_port_get_metadata(struct mlx5e_tc_int_port *int_port); +u32 mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port); +int mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port); +#else /* CONFIG_MLX5_CLS_ACT */ +static inline u32 +mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port) +{ + return 0; +} + +static inline int +mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port) +{ + return 0; +} + +static inline bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw) +{ + return false; +} + +static inline void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv) {} +static inline void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv) {} + +#endif /* CONFIG_MLX5_CLS_ACT */ +#endif /* __MLX5_EN_TC_INT_PORT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c new file mode 100644 index 0000000000..8218c892b1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c @@ -0,0 +1,595 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include +#include "lib/aso.h" +#include "en/tc/post_act.h" +#include "meter.h" +#include "en/tc_priv.h" + +#define MLX5_START_COLOR_SHIFT 28 +#define MLX5_METER_MODE_SHIFT 24 +#define MLX5_CBS_EXP_SHIFT 24 +#define MLX5_CBS_MAN_SHIFT 16 +#define MLX5_CIR_EXP_SHIFT 8 + +/* cir = 8*(10^9)*cir_mantissa/(2^cir_exponent)) bits/s */ +#define MLX5_CONST_CIR 8000000000ULL +#define MLX5_CALC_CIR(m, e) ((MLX5_CONST_CIR * (m)) >> (e)) +#define MLX5_MAX_CIR ((MLX5_CONST_CIR * 0x100) - 1) + +/* cbs = cbs_mantissa*2^cbs_exponent */ +#define MLX5_CALC_CBS(m, e) ((m) << (e)) +#define MLX5_MAX_CBS ((0x100ULL << 0x1F) - 1) +#define MLX5_MAX_HW_CBS 0x7FFFFFFF + +struct mlx5e_flow_meter_aso_obj { + struct list_head entry; + int base_id; + int total_meters; + + unsigned long meters_map[]; /* must be at the end of this struct */ +}; + +struct mlx5e_flow_meters { + enum mlx5_flow_namespace_type ns_type; + struct mlx5_aso *aso; + struct mutex aso_lock; /* Protects aso operations */ + int log_granularity; + u32 pdn; + + DECLARE_HASHTABLE(hashtbl, 8); + + struct mutex sync_lock; /* protect flow meter operations */ + struct list_head partial_list; + struct list_head full_list; + + struct mlx5_core_dev *mdev; + struct mlx5e_post_act *post_act; +}; + +static void +mlx5e_flow_meter_cir_calc(u64 cir, u8 *man, u8 *exp) +{ + s64 _cir, _delta, delta = S64_MAX; + u8 e, _man = 0, _exp = 0; + u64 m; + + for (e = 0; e <= 0x1F; e++) { /* exp width 5bit */ + m = cir << e; + if ((s64)m < 0) /* overflow */ + break; + m = div64_u64(m, MLX5_CONST_CIR); + if (m > 0xFF) /* man width 8 bit */ + continue; + _cir = MLX5_CALC_CIR(m, e); + _delta = cir - _cir; + if (_delta < delta) { + _man = m; + _exp = e; + if (!_delta) + goto found; + delta = _delta; + } + } + +found: + *man = _man; + *exp = _exp; +} + +static void +mlx5e_flow_meter_cbs_calc(u64 cbs, u8 *man, u8 *exp) +{ + s64 _cbs, _delta, delta = S64_MAX; + u8 e, _man = 0, _exp = 0; + u64 m; + + for (e = 0; e <= 0x1F; e++) { /* exp width 5bit */ + m = cbs >> e; + if (m > 0xFF) /* man width 8 bit */ + continue; + _cbs = MLX5_CALC_CBS(m, e); + _delta = cbs - _cbs; + if (_delta < delta) { + _man = m; + _exp = e; + if (!_delta) + goto found; + delta = _delta; + } + } + +found: + *man = _man; + *exp = _exp; +} + +int +mlx5e_tc_meter_modify(struct mlx5_core_dev *mdev, + struct mlx5e_flow_meter_handle *meter, + struct mlx5e_flow_meter_params *meter_params) +{ + struct mlx5_wqe_aso_ctrl_seg *aso_ctrl; + struct mlx5_wqe_aso_data_seg *aso_data; + struct mlx5e_flow_meters *flow_meters; + u8 cir_man, cir_exp, cbs_man, cbs_exp; + struct mlx5_aso_wqe *aso_wqe; + unsigned long expires; + struct mlx5_aso *aso; + u64 rate, burst; + u8 ds_cnt; + int err; + + rate = meter_params->rate; + burst = meter_params->burst; + + /* HW treats each packet as 128 bytes in PPS mode */ + if (meter_params->mode == MLX5_RATE_LIMIT_PPS) { + rate <<= 10; + burst <<= 7; + } + + if (!rate || rate > MLX5_MAX_CIR || !burst || burst > MLX5_MAX_CBS) + return -EINVAL; + + /* HW has limitation of total 31 bits for cbs */ + if (burst > MLX5_MAX_HW_CBS) { + mlx5_core_warn(mdev, + "burst(%lld) is too large, use HW allowed value(%d)\n", + burst, MLX5_MAX_HW_CBS); + burst = MLX5_MAX_HW_CBS; + } + + mlx5_core_dbg(mdev, "meter mode=%d\n", meter_params->mode); + mlx5e_flow_meter_cir_calc(rate, &cir_man, &cir_exp); + mlx5_core_dbg(mdev, "rate=%lld, cir=%lld, exp=%d, man=%d\n", + rate, MLX5_CALC_CIR(cir_man, cir_exp), cir_exp, cir_man); + mlx5e_flow_meter_cbs_calc(burst, &cbs_man, &cbs_exp); + mlx5_core_dbg(mdev, "burst=%lld, cbs=%lld, exp=%d, man=%d\n", + burst, MLX5_CALC_CBS((u64)cbs_man, cbs_exp), cbs_exp, cbs_man); + + if (!cir_man || !cbs_man) + return -EINVAL; + + flow_meters = meter->flow_meters; + aso = flow_meters->aso; + + mutex_lock(&flow_meters->aso_lock); + aso_wqe = mlx5_aso_get_wqe(aso); + ds_cnt = DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe_data), MLX5_SEND_WQE_DS); + mlx5_aso_build_wqe(aso, ds_cnt, aso_wqe, meter->obj_id, + MLX5_ACCESS_ASO_OPC_MOD_FLOW_METER); + + aso_ctrl = &aso_wqe->aso_ctrl; + aso_ctrl->data_mask_mode = MLX5_ASO_DATA_MASK_MODE_BYTEWISE_64BYTE << 6; + aso_ctrl->condition_1_0_operand = MLX5_ASO_ALWAYS_TRUE | + MLX5_ASO_ALWAYS_TRUE << 4; + aso_ctrl->data_offset_condition_operand = MLX5_ASO_LOGICAL_OR << 6; + aso_ctrl->data_mask = cpu_to_be64(0x80FFFFFFULL << (meter->idx ? 0 : 32)); + + aso_data = (struct mlx5_wqe_aso_data_seg *)(aso_wqe + 1); + memset(aso_data, 0, sizeof(*aso_data)); + aso_data->bytewise_data[meter->idx * 8] = cpu_to_be32((0x1 << 31) | /* valid */ + (MLX5_FLOW_METER_COLOR_GREEN << MLX5_START_COLOR_SHIFT)); + if (meter_params->mode == MLX5_RATE_LIMIT_PPS) + aso_data->bytewise_data[meter->idx * 8] |= + cpu_to_be32(MLX5_FLOW_METER_MODE_NUM_PACKETS << MLX5_METER_MODE_SHIFT); + else + aso_data->bytewise_data[meter->idx * 8] |= + cpu_to_be32(MLX5_FLOW_METER_MODE_BYTES_IP_LENGTH << MLX5_METER_MODE_SHIFT); + + aso_data->bytewise_data[meter->idx * 8 + 2] = cpu_to_be32((cbs_exp << MLX5_CBS_EXP_SHIFT) | + (cbs_man << MLX5_CBS_MAN_SHIFT) | + (cir_exp << MLX5_CIR_EXP_SHIFT) | + cir_man); + + mlx5_aso_post_wqe(aso, true, &aso_wqe->ctrl); + + /* With newer FW, the wait for the first ASO WQE is more than 2us, put the wait 10ms. */ + expires = jiffies + msecs_to_jiffies(10); + do { + err = mlx5_aso_poll_cq(aso, true); + if (err) + usleep_range(2, 10); + } while (err && time_is_after_jiffies(expires)); + mutex_unlock(&flow_meters->aso_lock); + + return err; +} + +static int +mlx5e_flow_meter_create_aso_obj(struct mlx5e_flow_meters *flow_meters, int *obj_id) +{ + u32 in[MLX5_ST_SZ_DW(create_flow_meter_aso_obj_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + struct mlx5_core_dev *mdev = flow_meters->mdev; + void *obj, *param; + int err; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO); + param = MLX5_ADDR_OF(general_obj_in_cmd_hdr, in, op_param); + MLX5_SET(general_obj_create_param, param, log_obj_range, + flow_meters->log_granularity); + + obj = MLX5_ADDR_OF(create_flow_meter_aso_obj_in, in, flow_meter_aso_obj); + MLX5_SET(flow_meter_aso_obj, obj, meter_aso_access_pd, flow_meters->pdn); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (!err) { + *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + mlx5_core_dbg(mdev, "flow meter aso obj(0x%x) created\n", *obj_id); + } + + return err; +} + +static void +mlx5e_flow_meter_destroy_aso_obj(struct mlx5_core_dev *mdev, u32 obj_id) +{ + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + mlx5_core_dbg(mdev, "flow meter aso obj(0x%x) destroyed\n", obj_id); +} + +static struct mlx5e_flow_meter_handle * +__mlx5e_flow_meter_alloc(struct mlx5e_flow_meters *flow_meters, bool alloc_aso) +{ + struct mlx5_core_dev *mdev = flow_meters->mdev; + struct mlx5e_flow_meter_aso_obj *meters_obj; + struct mlx5e_flow_meter_handle *meter; + struct mlx5_fc *counter; + int err, pos, total; + u32 id; + + meter = kzalloc(sizeof(*meter), GFP_KERNEL); + if (!meter) + return ERR_PTR(-ENOMEM); + + counter = mlx5_fc_create(mdev, true); + if (IS_ERR(counter)) { + err = PTR_ERR(counter); + goto err_drop_counter; + } + meter->drop_counter = counter; + + counter = mlx5_fc_create(mdev, true); + if (IS_ERR(counter)) { + err = PTR_ERR(counter); + goto err_act_counter; + } + meter->act_counter = counter; + + if (!alloc_aso) + goto no_aso; + + meters_obj = list_first_entry_or_null(&flow_meters->partial_list, + struct mlx5e_flow_meter_aso_obj, + entry); + /* 2 meters in one object */ + total = 1 << (flow_meters->log_granularity + 1); + if (!meters_obj) { + err = mlx5e_flow_meter_create_aso_obj(flow_meters, &id); + if (err) { + mlx5_core_err(mdev, "Failed to create flow meter ASO object\n"); + goto err_create; + } + + meters_obj = kzalloc(sizeof(*meters_obj) + BITS_TO_BYTES(total), + GFP_KERNEL); + if (!meters_obj) { + err = -ENOMEM; + goto err_mem; + } + + meters_obj->base_id = id; + meters_obj->total_meters = total; + list_add(&meters_obj->entry, &flow_meters->partial_list); + pos = 0; + } else { + pos = find_first_zero_bit(meters_obj->meters_map, total); + if (bitmap_weight(meters_obj->meters_map, total) == total - 1) { + list_del(&meters_obj->entry); + list_add(&meters_obj->entry, &flow_meters->full_list); + } + } + + bitmap_set(meters_obj->meters_map, pos, 1); + meter->meters_obj = meters_obj; + meter->obj_id = meters_obj->base_id + pos / 2; + meter->idx = pos % 2; + +no_aso: + meter->flow_meters = flow_meters; + mlx5_core_dbg(mdev, "flow meter allocated, obj_id=0x%x, index=%d\n", + meter->obj_id, meter->idx); + + return meter; + +err_mem: + mlx5e_flow_meter_destroy_aso_obj(mdev, id); +err_create: + mlx5_fc_destroy(mdev, meter->act_counter); +err_act_counter: + mlx5_fc_destroy(mdev, meter->drop_counter); +err_drop_counter: + kfree(meter); + return ERR_PTR(err); +} + +static void +__mlx5e_flow_meter_free(struct mlx5e_flow_meter_handle *meter) +{ + struct mlx5e_flow_meters *flow_meters = meter->flow_meters; + struct mlx5_core_dev *mdev = flow_meters->mdev; + struct mlx5e_flow_meter_aso_obj *meters_obj; + int n, pos; + + mlx5_fc_destroy(mdev, meter->act_counter); + mlx5_fc_destroy(mdev, meter->drop_counter); + + if (meter->params.mtu) + goto out_no_aso; + + meters_obj = meter->meters_obj; + pos = (meter->obj_id - meters_obj->base_id) * 2 + meter->idx; + bitmap_clear(meters_obj->meters_map, pos, 1); + n = bitmap_weight(meters_obj->meters_map, meters_obj->total_meters); + if (n == 0) { + list_del(&meters_obj->entry); + mlx5e_flow_meter_destroy_aso_obj(mdev, meters_obj->base_id); + kfree(meters_obj); + } else if (n == meters_obj->total_meters - 1) { + list_del(&meters_obj->entry); + list_add(&meters_obj->entry, &flow_meters->partial_list); + } + +out_no_aso: + mlx5_core_dbg(mdev, "flow meter freed, obj_id=0x%x, index=%d\n", + meter->obj_id, meter->idx); + kfree(meter); +} + +static struct mlx5e_flow_meter_handle * +__mlx5e_tc_meter_get(struct mlx5e_flow_meters *flow_meters, u32 index) +{ + struct mlx5e_flow_meter_handle *meter; + + hash_for_each_possible(flow_meters->hashtbl, meter, hlist, index) + if (meter->params.index == index) + goto add_ref; + + return ERR_PTR(-ENOENT); + +add_ref: + meter->refcnt++; + + return meter; +} + +struct mlx5e_flow_meter_handle * +mlx5e_tc_meter_get(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params) +{ + struct mlx5e_flow_meters *flow_meters; + struct mlx5e_flow_meter_handle *meter; + + flow_meters = mlx5e_get_flow_meters(mdev); + if (!flow_meters) + return ERR_PTR(-EOPNOTSUPP); + + mutex_lock(&flow_meters->sync_lock); + meter = __mlx5e_tc_meter_get(flow_meters, params->index); + mutex_unlock(&flow_meters->sync_lock); + + return meter; +} + +static void +__mlx5e_tc_meter_put(struct mlx5e_flow_meter_handle *meter) +{ + if (--meter->refcnt == 0) { + hash_del(&meter->hlist); + __mlx5e_flow_meter_free(meter); + } +} + +void +mlx5e_tc_meter_put(struct mlx5e_flow_meter_handle *meter) +{ + struct mlx5e_flow_meters *flow_meters = meter->flow_meters; + + mutex_lock(&flow_meters->sync_lock); + __mlx5e_tc_meter_put(meter); + mutex_unlock(&flow_meters->sync_lock); +} + +static struct mlx5e_flow_meter_handle * +mlx5e_tc_meter_alloc(struct mlx5e_flow_meters *flow_meters, + struct mlx5e_flow_meter_params *params) +{ + struct mlx5e_flow_meter_handle *meter; + + meter = __mlx5e_flow_meter_alloc(flow_meters, !params->mtu); + if (IS_ERR(meter)) + return meter; + + hash_add(flow_meters->hashtbl, &meter->hlist, params->index); + meter->params.index = params->index; + meter->params.mtu = params->mtu; + meter->refcnt++; + + return meter; +} + +static int +__mlx5e_tc_meter_update(struct mlx5e_flow_meter_handle *meter, + struct mlx5e_flow_meter_params *params) +{ + struct mlx5_core_dev *mdev = meter->flow_meters->mdev; + int err = 0; + + if (meter->params.mode != params->mode || meter->params.rate != params->rate || + meter->params.burst != params->burst) { + err = mlx5e_tc_meter_modify(mdev, meter, params); + if (err) + goto out; + + meter->params.mode = params->mode; + meter->params.rate = params->rate; + meter->params.burst = params->burst; + } + +out: + return err; +} + +int +mlx5e_tc_meter_update(struct mlx5e_flow_meter_handle *meter, + struct mlx5e_flow_meter_params *params) +{ + struct mlx5_core_dev *mdev = meter->flow_meters->mdev; + struct mlx5e_flow_meters *flow_meters; + int err; + + flow_meters = mlx5e_get_flow_meters(mdev); + if (!flow_meters) + return -EOPNOTSUPP; + + mutex_lock(&flow_meters->sync_lock); + err = __mlx5e_tc_meter_update(meter, params); + mutex_unlock(&flow_meters->sync_lock); + return err; +} + +struct mlx5e_flow_meter_handle * +mlx5e_tc_meter_replace(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params) +{ + struct mlx5e_flow_meters *flow_meters; + struct mlx5e_flow_meter_handle *meter; + int err; + + flow_meters = mlx5e_get_flow_meters(mdev); + if (!flow_meters) + return ERR_PTR(-EOPNOTSUPP); + + mutex_lock(&flow_meters->sync_lock); + meter = __mlx5e_tc_meter_get(flow_meters, params->index); + if (IS_ERR(meter)) { + meter = mlx5e_tc_meter_alloc(flow_meters, params); + if (IS_ERR(meter)) { + err = PTR_ERR(meter); + goto err_get; + } + } + + err = __mlx5e_tc_meter_update(meter, params); + if (err) + goto err_update; + + mutex_unlock(&flow_meters->sync_lock); + return meter; + +err_update: + __mlx5e_tc_meter_put(meter); +err_get: + mutex_unlock(&flow_meters->sync_lock); + return ERR_PTR(err); +} + +enum mlx5_flow_namespace_type +mlx5e_tc_meter_get_namespace(struct mlx5e_flow_meters *flow_meters) +{ + return flow_meters->ns_type; +} + +struct mlx5e_flow_meters * +mlx5e_flow_meters_init(struct mlx5e_priv *priv, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_flow_meters *flow_meters; + int err; + + if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) & + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_FLOW_METER_ASO)) + return ERR_PTR(-EOPNOTSUPP); + + if (IS_ERR_OR_NULL(post_act)) { + netdev_dbg(priv->netdev, + "flow meter offload is not supported, post action is missing\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + flow_meters = kzalloc(sizeof(*flow_meters), GFP_KERNEL); + if (!flow_meters) + return ERR_PTR(-ENOMEM); + + err = mlx5_core_alloc_pd(mdev, &flow_meters->pdn); + if (err) { + mlx5_core_err(mdev, "Failed to alloc pd for flow meter aso, err=%d\n", err); + goto err_out; + } + + flow_meters->aso = mlx5_aso_create(mdev, flow_meters->pdn); + if (IS_ERR(flow_meters->aso)) { + mlx5_core_warn(mdev, "Failed to create aso wqe for flow meter\n"); + err = PTR_ERR(flow_meters->aso); + goto err_sq; + } + + mutex_init(&flow_meters->sync_lock); + INIT_LIST_HEAD(&flow_meters->partial_list); + INIT_LIST_HEAD(&flow_meters->full_list); + + flow_meters->ns_type = ns_type; + flow_meters->mdev = mdev; + flow_meters->post_act = post_act; + mutex_init(&flow_meters->aso_lock); + flow_meters->log_granularity = min_t(int, 6, + MLX5_CAP_QOS(mdev, log_meter_aso_max_alloc)); + + return flow_meters; + +err_sq: + mlx5_core_dealloc_pd(mdev, flow_meters->pdn); +err_out: + kfree(flow_meters); + return ERR_PTR(err); +} + +void +mlx5e_flow_meters_cleanup(struct mlx5e_flow_meters *flow_meters) +{ + if (IS_ERR_OR_NULL(flow_meters)) + return; + + mlx5_aso_destroy(flow_meters->aso); + mlx5_core_dealloc_pd(flow_meters->mdev, flow_meters->pdn); + kfree(flow_meters); +} + +void +mlx5e_tc_meter_get_stats(struct mlx5e_flow_meter_handle *meter, + u64 *bytes, u64 *packets, u64 *drops, u64 *lastuse) +{ + u64 bytes1, packets1, lastuse1; + u64 bytes2, packets2, lastuse2; + + mlx5_fc_query_cached(meter->act_counter, &bytes1, &packets1, &lastuse1); + mlx5_fc_query_cached(meter->drop_counter, &bytes2, &packets2, &lastuse2); + + *bytes = bytes1 + bytes2; + *packets = packets1 + packets2; + *drops = packets2; + *lastuse = max_t(u64, lastuse1, lastuse2); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h new file mode 100644 index 0000000000..9b795cd106 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_FLOW_METER_H__ +#define __MLX5_EN_FLOW_METER_H__ + +struct mlx5e_post_meter_priv; +struct mlx5e_flow_meter_aso_obj; +struct mlx5e_flow_meters; +struct mlx5_flow_attr; + +enum mlx5e_flow_meter_mode { + MLX5_RATE_LIMIT_BPS, + MLX5_RATE_LIMIT_PPS, +}; + +struct mlx5e_flow_meter_params { + enum mlx5e_flow_meter_mode mode; + /* police action index */ + u32 index; + u64 rate; + u64 burst; + u32 mtu; +}; + +struct mlx5e_flow_meter_handle { + struct mlx5e_flow_meters *flow_meters; + struct mlx5e_flow_meter_aso_obj *meters_obj; + u32 obj_id; + u8 idx; + + int refcnt; + struct hlist_node hlist; + struct mlx5e_flow_meter_params params; + + struct mlx5_fc *act_counter; + struct mlx5_fc *drop_counter; +}; + +struct mlx5e_meter_attr { + struct mlx5e_flow_meter_params params; + struct mlx5e_flow_meter_handle *meter; + struct mlx5e_post_meter_priv *post_meter; +}; + +int +mlx5e_tc_meter_modify(struct mlx5_core_dev *mdev, + struct mlx5e_flow_meter_handle *meter, + struct mlx5e_flow_meter_params *meter_params); + +struct mlx5e_flow_meter_handle * +mlx5e_tc_meter_get(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params); +void +mlx5e_tc_meter_put(struct mlx5e_flow_meter_handle *meter); +int +mlx5e_tc_meter_update(struct mlx5e_flow_meter_handle *meter, + struct mlx5e_flow_meter_params *params); +struct mlx5e_flow_meter_handle * +mlx5e_tc_meter_replace(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params); + +enum mlx5_flow_namespace_type +mlx5e_tc_meter_get_namespace(struct mlx5e_flow_meters *flow_meters); + +struct mlx5e_flow_meters * +mlx5e_flow_meters_init(struct mlx5e_priv *priv, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_action); +void +mlx5e_flow_meters_cleanup(struct mlx5e_flow_meters *flow_meters); + +void +mlx5e_tc_meter_get_stats(struct mlx5e_flow_meter_handle *meter, + u64 *bytes, u64 *packets, u64 *drops, u64 *lastuse); + +#endif /* __MLX5_EN_FLOW_METER_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c new file mode 100644 index 0000000000..86bf007fd0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "en/tc_priv.h" +#include "en_tc.h" +#include "post_act.h" +#include "mlx5_core.h" +#include "fs_core.h" + +struct mlx5e_post_act { + enum mlx5_flow_namespace_type ns_type; + struct mlx5_fs_chains *chains; + struct mlx5_flow_table *ft; + struct mlx5e_priv *priv; + struct xarray ids; +}; + +struct mlx5e_post_act_handle { + enum mlx5_flow_namespace_type ns_type; + struct mlx5_flow_attr *attr; + struct mlx5_flow_handle *rule; + u32 id; +}; + +#define MLX5_POST_ACTION_BITS MLX5_REG_MAPPING_MBITS(FTEID_TO_REG) +#define MLX5_POST_ACTION_MASK MLX5_REG_MAPPING_MASK(FTEID_TO_REG) +#define MLX5_POST_ACTION_MAX MLX5_POST_ACTION_MASK + +struct mlx5e_post_act * +mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, + enum mlx5_flow_namespace_type ns_type) +{ + enum fs_flow_table_type table_type = ns_type == MLX5_FLOW_NAMESPACE_FDB ? + FS_FT_FDB : FS_FT_NIC_RX; + struct mlx5e_post_act *post_act; + int err; + + if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ignore_flow_level, table_type)) { + if (priv->mdev->coredev_type == MLX5_COREDEV_PF) + mlx5_core_warn(priv->mdev, "firmware level support is missing\n"); + err = -EOPNOTSUPP; + goto err_check; + } + + post_act = kzalloc(sizeof(*post_act), GFP_KERNEL); + if (!post_act) { + err = -ENOMEM; + goto err_check; + } + post_act->ft = mlx5_chains_create_global_table(chains); + if (IS_ERR(post_act->ft)) { + err = PTR_ERR(post_act->ft); + mlx5_core_warn(priv->mdev, "failed to create post action table, err: %d\n", err); + goto err_ft; + } + post_act->chains = chains; + post_act->ns_type = ns_type; + post_act->priv = priv; + xa_init_flags(&post_act->ids, XA_FLAGS_ALLOC1); + return post_act; + +err_ft: + kfree(post_act); +err_check: + return ERR_PTR(err); +} + +void +mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act) +{ + if (IS_ERR_OR_NULL(post_act)) + return; + + xa_destroy(&post_act->ids); + mlx5_chains_destroy_global_table(post_act->chains, post_act->ft); + kfree(post_act); +} + +int +mlx5e_tc_post_act_offload(struct mlx5e_post_act *post_act, + struct mlx5e_post_act_handle *handle) +{ + struct mlx5_flow_spec *spec; + int err; + + if (IS_ERR(post_act)) + return PTR_ERR(post_act); + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + /* Post action rule matches on fte_id and executes original rule's tc rule action */ + mlx5e_tc_match_to_reg_match(spec, FTEID_TO_REG, handle->id, MLX5_POST_ACTION_MASK); + + handle->rule = mlx5e_tc_rule_offload(post_act->priv, spec, handle->attr); + if (IS_ERR(handle->rule)) { + err = PTR_ERR(handle->rule); + netdev_warn(post_act->priv->netdev, "Failed to add post action rule"); + goto err_rule; + } + + kvfree(spec); + return 0; + +err_rule: + kvfree(spec); + return err; +} + +struct mlx5e_post_act_handle * +mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *post_attr) +{ + struct mlx5e_post_act_handle *handle; + int err; + + if (IS_ERR(post_act)) + return ERR_CAST(post_act); + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) + return ERR_PTR(-ENOMEM); + + post_attr->chain = 0; + post_attr->prio = 0; + post_attr->ft = post_act->ft; + post_attr->inner_match_level = MLX5_MATCH_NONE; + post_attr->outer_match_level = MLX5_MATCH_NONE; + post_attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_DECAP; + post_attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT; + + handle->ns_type = post_act->ns_type; + /* Splits were handled before post action */ + if (handle->ns_type == MLX5_FLOW_NAMESPACE_FDB) + post_attr->esw_attr->split_count = 0; + + err = xa_alloc(&post_act->ids, &handle->id, post_attr, + XA_LIMIT(1, MLX5_POST_ACTION_MAX), GFP_KERNEL); + if (err) + goto err_xarray; + + handle->attr = post_attr; + + return handle; + +err_xarray: + kfree(handle); + return ERR_PTR(err); +} + +void +mlx5e_tc_post_act_unoffload(struct mlx5e_post_act *post_act, + struct mlx5e_post_act_handle *handle) +{ + mlx5e_tc_rule_unoffload(post_act->priv, handle->rule, handle->attr); + handle->rule = NULL; +} + +void +mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle) +{ + if (!IS_ERR_OR_NULL(handle->rule)) + mlx5e_tc_post_act_unoffload(post_act, handle); + xa_erase(&post_act->ids, handle->id); + kfree(handle); +} + +struct mlx5_flow_table * +mlx5e_tc_post_act_get_ft(struct mlx5e_post_act *post_act) +{ + return post_act->ft; +} + +/* Allocate a header modify action to write the post action handle fte id to a register. */ +int +mlx5e_tc_post_act_set_handle(struct mlx5_core_dev *dev, + struct mlx5e_post_act_handle *handle, + struct mlx5e_tc_mod_hdr_acts *acts) +{ + return mlx5e_tc_match_to_reg_set(dev, acts, handle->ns_type, FTEID_TO_REG, handle->id); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h new file mode 100644 index 0000000000..40b8df184a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_POST_ACTION_H__ +#define __MLX5_POST_ACTION_H__ + +#include "en.h" +#include "lib/fs_chains.h" + +struct mlx5_flow_attr; +struct mlx5e_priv; +struct mlx5e_tc_mod_hdr_acts; + +struct mlx5e_post_act * +mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, + enum mlx5_flow_namespace_type ns_type); + +void +mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act); + +struct mlx5e_post_act_handle * +mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *post_attr); + +void +mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle); + +int +mlx5e_tc_post_act_offload(struct mlx5e_post_act *post_act, + struct mlx5e_post_act_handle *handle); + +void +mlx5e_tc_post_act_unoffload(struct mlx5e_post_act *post_act, + struct mlx5e_post_act_handle *handle); + +struct mlx5_flow_table * +mlx5e_tc_post_act_get_ft(struct mlx5e_post_act *post_act); + +int +mlx5e_tc_post_act_set_handle(struct mlx5_core_dev *dev, + struct mlx5e_post_act_handle *handle, + struct mlx5e_tc_mod_hdr_acts *acts); + +#endif /* __MLX5_POST_ACTION_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c new file mode 100644 index 0000000000..50b60fd009 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c @@ -0,0 +1,460 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "en/tc_priv.h" +#include "post_meter.h" +#include "en/tc/post_act.h" + +#define MLX5_PACKET_COLOR_BITS MLX5_REG_MAPPING_MBITS(PACKET_COLOR_TO_REG) +#define MLX5_PACKET_COLOR_MASK MLX5_REG_MAPPING_MASK(PACKET_COLOR_TO_REG) + +struct mlx5e_post_meter_rate_table { + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct mlx5_flow_handle *green_rule; + struct mlx5_flow_attr *green_attr; + struct mlx5_flow_handle *red_rule; + struct mlx5_flow_attr *red_attr; +}; + +struct mlx5e_post_meter_mtu_table { + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct mlx5_flow_handle *rule; + struct mlx5_flow_attr *attr; +}; + +struct mlx5e_post_meter_mtu_tables { + struct mlx5e_post_meter_mtu_table green_table; + struct mlx5e_post_meter_mtu_table red_table; +}; + +struct mlx5e_post_meter_priv { + enum mlx5e_post_meter_type type; + union { + struct mlx5e_post_meter_rate_table rate_steering_table; + struct mlx5e_post_meter_mtu_tables mtu_tables; + }; +}; + +struct mlx5_flow_table * +mlx5e_post_meter_get_ft(struct mlx5e_post_meter_priv *post_meter) +{ + return post_meter->rate_steering_table.ft; +} + +struct mlx5_flow_table * +mlx5e_post_meter_get_mtu_true_ft(struct mlx5e_post_meter_priv *post_meter) +{ + return post_meter->mtu_tables.green_table.ft; +} + +struct mlx5_flow_table * +mlx5e_post_meter_get_mtu_false_ft(struct mlx5e_post_meter_priv *post_meter) +{ + return post_meter->mtu_tables.red_table.ft; +} + +static struct mlx5_flow_table * +mlx5e_post_meter_table_create(struct mlx5e_priv *priv, + enum mlx5_flow_namespace_type ns_type) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *root_ns; + + root_ns = mlx5_get_flow_namespace(priv->mdev, ns_type); + if (!root_ns) { + mlx5_core_warn(priv->mdev, "Failed to get namespace for flow meter\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; + ft_attr.prio = FDB_SLOW_PATH; + ft_attr.max_fte = 2; + ft_attr.level = 1; + + return mlx5_create_flow_table(root_ns, &ft_attr); +} + +static int +mlx5e_post_meter_rate_fg_create(struct mlx5e_priv *priv, + struct mlx5e_post_meter_priv *post_meter) +{ + struct mlx5e_post_meter_rate_table *table = &post_meter->rate_steering_table; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *misc2, *match_criteria; + u32 *flow_group_in; + int err = 0; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + misc2 = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_5, MLX5_PACKET_COLOR_MASK); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); + + table->fg = mlx5_create_flow_group(table->ft, flow_group_in); + if (IS_ERR(table->fg)) { + mlx5_core_warn(priv->mdev, "Failed to create post_meter flow group\n"); + err = PTR_ERR(table->fg); + } + + kvfree(flow_group_in); + return err; +} + +static struct mlx5_flow_handle * +mlx5e_post_meter_add_rule(struct mlx5e_priv *priv, + struct mlx5e_post_meter_priv *post_meter, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr, + struct mlx5_fc *act_counter, + struct mlx5_fc *drop_counter) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_flow_handle *ret; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_DROP) + attr->counter = drop_counter; + else + attr->counter = act_counter; + + attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT; + attr->inner_match_level = MLX5_MATCH_NONE; + attr->outer_match_level = MLX5_MATCH_NONE; + attr->chain = 0; + attr->prio = 0; + + ret = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); + + /* We did not create the counter, so we can't delete it. + * Avoid freeing the counter when the attr is deleted in free_branching_attr + */ + attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT; + + return ret; +} + +static int +mlx5e_post_meter_rate_rules_create(struct mlx5e_priv *priv, + struct mlx5e_post_meter_priv *post_meter, + struct mlx5e_post_act *post_act, + struct mlx5_fc *act_counter, + struct mlx5_fc *drop_counter, + struct mlx5_flow_attr *green_attr, + struct mlx5_flow_attr *red_attr) +{ + struct mlx5e_post_meter_rate_table *table = &post_meter->rate_steering_table; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + mlx5e_tc_match_to_reg_match(spec, PACKET_COLOR_TO_REG, + MLX5_FLOW_METER_COLOR_RED, MLX5_PACKET_COLOR_MASK); + red_attr->ft = post_meter->rate_steering_table.ft; + rule = mlx5e_post_meter_add_rule(priv, post_meter, spec, red_attr, + act_counter, drop_counter); + if (IS_ERR(rule)) { + mlx5_core_warn(priv->mdev, "Failed to create post_meter exceed rule\n"); + err = PTR_ERR(rule); + goto err_red; + } + table->red_rule = rule; + table->red_attr = red_attr; + + mlx5e_tc_match_to_reg_match(spec, PACKET_COLOR_TO_REG, + MLX5_FLOW_METER_COLOR_GREEN, MLX5_PACKET_COLOR_MASK); + green_attr->ft = post_meter->rate_steering_table.ft; + rule = mlx5e_post_meter_add_rule(priv, post_meter, spec, green_attr, + act_counter, drop_counter); + if (IS_ERR(rule)) { + mlx5_core_warn(priv->mdev, "Failed to create post_meter notexceed rule\n"); + err = PTR_ERR(rule); + goto err_green; + } + table->green_rule = rule; + table->green_attr = green_attr; + + kvfree(spec); + return 0; + +err_green: + mlx5_del_flow_rules(table->red_rule); +err_red: + kvfree(spec); + return err; +} + +static void +mlx5e_post_meter_rate_rules_destroy(struct mlx5_eswitch *esw, + struct mlx5e_post_meter_priv *post_meter) +{ + struct mlx5e_post_meter_rate_table *rate_table = &post_meter->rate_steering_table; + + mlx5_eswitch_del_offloaded_rule(esw, rate_table->red_rule, rate_table->red_attr); + mlx5_eswitch_del_offloaded_rule(esw, rate_table->green_rule, rate_table->green_attr); +} + +static void +mlx5e_post_meter_rate_fg_destroy(struct mlx5e_post_meter_priv *post_meter) +{ + mlx5_destroy_flow_group(post_meter->rate_steering_table.fg); +} + +static void +mlx5e_post_meter_rate_table_destroy(struct mlx5e_post_meter_priv *post_meter) +{ + mlx5_destroy_flow_table(post_meter->rate_steering_table.ft); +} + +static void +mlx5e_post_meter_mtu_rules_destroy(struct mlx5e_post_meter_priv *post_meter) +{ + struct mlx5e_post_meter_mtu_tables *mtu_tables = &post_meter->mtu_tables; + + mlx5_del_flow_rules(mtu_tables->green_table.rule); + mlx5_del_flow_rules(mtu_tables->red_table.rule); +} + +static void +mlx5e_post_meter_mtu_fg_destroy(struct mlx5e_post_meter_priv *post_meter) +{ + struct mlx5e_post_meter_mtu_tables *mtu_tables = &post_meter->mtu_tables; + + mlx5_destroy_flow_group(mtu_tables->green_table.fg); + mlx5_destroy_flow_group(mtu_tables->red_table.fg); +} + +static void +mlx5e_post_meter_mtu_table_destroy(struct mlx5e_post_meter_priv *post_meter) +{ + struct mlx5e_post_meter_mtu_tables *mtu_tables = &post_meter->mtu_tables; + + mlx5_destroy_flow_table(mtu_tables->green_table.ft); + mlx5_destroy_flow_table(mtu_tables->red_table.ft); +} + +static int +mlx5e_post_meter_rate_create(struct mlx5e_priv *priv, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act, + struct mlx5_fc *act_counter, + struct mlx5_fc *drop_counter, + struct mlx5e_post_meter_priv *post_meter, + struct mlx5_flow_attr *green_attr, + struct mlx5_flow_attr *red_attr) +{ + struct mlx5_flow_table *ft; + int err; + + post_meter->type = MLX5E_POST_METER_RATE; + + ft = mlx5e_post_meter_table_create(priv, ns_type); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_warn(priv->mdev, "Failed to create post_meter table\n"); + goto err_ft; + } + + post_meter->rate_steering_table.ft = ft; + + err = mlx5e_post_meter_rate_fg_create(priv, post_meter); + if (err) + goto err_fg; + + err = mlx5e_post_meter_rate_rules_create(priv, post_meter, post_act, + act_counter, drop_counter, + green_attr, red_attr); + if (err) + goto err_rules; + + return 0; + +err_rules: + mlx5e_post_meter_rate_fg_destroy(post_meter); +err_fg: + mlx5e_post_meter_rate_table_destroy(post_meter); +err_ft: + return err; +} + +static int +mlx5e_post_meter_create_mtu_table(struct mlx5e_priv *priv, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_meter_mtu_table *table) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *flow_group_in; + int err; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + table->ft = mlx5e_post_meter_table_create(priv, ns_type); + if (IS_ERR(table->ft)) { + err = PTR_ERR(table->ft); + goto err_ft; + } + + /* create miss group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); + fg = mlx5_create_flow_group(table->ft, flow_group_in); + if (IS_ERR(fg)) { + err = PTR_ERR(fg); + goto err_miss_grp; + } + table->fg = fg; + + kvfree(flow_group_in); + return 0; + +err_miss_grp: + mlx5_destroy_flow_table(table->ft); +err_ft: + kvfree(flow_group_in); + return err; +} + +static int +mlx5e_post_meter_mtu_create(struct mlx5e_priv *priv, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act, + struct mlx5_fc *act_counter, + struct mlx5_fc *drop_counter, + struct mlx5e_post_meter_priv *post_meter, + struct mlx5_flow_attr *green_attr, + struct mlx5_flow_attr *red_attr) +{ + struct mlx5e_post_meter_mtu_tables *mtu_tables = &post_meter->mtu_tables; + static struct mlx5_flow_spec zero_spec = {}; + struct mlx5_flow_handle *rule; + int err; + + post_meter->type = MLX5E_POST_METER_MTU; + + err = mlx5e_post_meter_create_mtu_table(priv, ns_type, &mtu_tables->green_table); + if (err) + goto err_green_ft; + + green_attr->ft = mtu_tables->green_table.ft; + rule = mlx5e_post_meter_add_rule(priv, post_meter, &zero_spec, green_attr, + act_counter, drop_counter); + if (IS_ERR(rule)) { + mlx5_core_warn(priv->mdev, "Failed to create post_meter conform rule\n"); + err = PTR_ERR(rule); + goto err_green_rule; + } + mtu_tables->green_table.rule = rule; + mtu_tables->green_table.attr = green_attr; + + err = mlx5e_post_meter_create_mtu_table(priv, ns_type, &mtu_tables->red_table); + if (err) + goto err_red_ft; + + red_attr->ft = mtu_tables->red_table.ft; + rule = mlx5e_post_meter_add_rule(priv, post_meter, &zero_spec, red_attr, + act_counter, drop_counter); + if (IS_ERR(rule)) { + mlx5_core_warn(priv->mdev, "Failed to create post_meter exceed rule\n"); + err = PTR_ERR(rule); + goto err_red_rule; + } + mtu_tables->red_table.rule = rule; + mtu_tables->red_table.attr = red_attr; + + return 0; + +err_red_rule: + mlx5_destroy_flow_table(mtu_tables->red_table.ft); +err_red_ft: + mlx5_del_flow_rules(mtu_tables->green_table.rule); +err_green_rule: + mlx5_destroy_flow_table(mtu_tables->green_table.ft); +err_green_ft: + return err; +} + +struct mlx5e_post_meter_priv * +mlx5e_post_meter_init(struct mlx5e_priv *priv, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act, + enum mlx5e_post_meter_type type, + struct mlx5_fc *act_counter, + struct mlx5_fc *drop_counter, + struct mlx5_flow_attr *branch_true, + struct mlx5_flow_attr *branch_false) +{ + struct mlx5e_post_meter_priv *post_meter; + int err; + + post_meter = kzalloc(sizeof(*post_meter), GFP_KERNEL); + if (!post_meter) + return ERR_PTR(-ENOMEM); + + switch (type) { + case MLX5E_POST_METER_MTU: + err = mlx5e_post_meter_mtu_create(priv, ns_type, post_act, + act_counter, drop_counter, post_meter, + branch_true, branch_false); + break; + case MLX5E_POST_METER_RATE: + err = mlx5e_post_meter_rate_create(priv, ns_type, post_act, + act_counter, drop_counter, post_meter, + branch_true, branch_false); + break; + default: + err = -EOPNOTSUPP; + } + + if (err) + goto err; + + return post_meter; + +err: + kfree(post_meter); + return ERR_PTR(err); +} + +static void +mlx5e_post_meter_rate_destroy(struct mlx5_eswitch *esw, struct mlx5e_post_meter_priv *post_meter) +{ + mlx5e_post_meter_rate_rules_destroy(esw, post_meter); + mlx5e_post_meter_rate_fg_destroy(post_meter); + mlx5e_post_meter_rate_table_destroy(post_meter); +} + +static void +mlx5e_post_meter_mtu_destroy(struct mlx5e_post_meter_priv *post_meter) +{ + mlx5e_post_meter_mtu_rules_destroy(post_meter); + mlx5e_post_meter_mtu_fg_destroy(post_meter); + mlx5e_post_meter_mtu_table_destroy(post_meter); +} + +void +mlx5e_post_meter_cleanup(struct mlx5_eswitch *esw, struct mlx5e_post_meter_priv *post_meter) +{ + if (post_meter->type == MLX5E_POST_METER_RATE) + mlx5e_post_meter_rate_destroy(esw, post_meter); + else + mlx5e_post_meter_mtu_destroy(post_meter); + + kfree(post_meter); +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h new file mode 100644 index 0000000000..e013b77186 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_POST_METER_H__ +#define __MLX5_EN_POST_METER_H__ + +#define packet_color_to_reg { \ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_5, \ + .moffset = 0, \ + .mlen = 8, \ + .soffset = MLX5_BYTE_OFF(fte_match_param, \ + misc_parameters_2.metadata_reg_c_5), \ +} + +struct mlx5e_post_meter_priv; + +enum mlx5e_post_meter_type { + MLX5E_POST_METER_RATE = 0, + MLX5E_POST_METER_MTU +}; + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + +struct mlx5_flow_table * +mlx5e_post_meter_get_ft(struct mlx5e_post_meter_priv *post_meter); + +struct mlx5_flow_table * +mlx5e_post_meter_get_mtu_true_ft(struct mlx5e_post_meter_priv *post_meter); + +struct mlx5_flow_table * +mlx5e_post_meter_get_mtu_false_ft(struct mlx5e_post_meter_priv *post_meter); + +struct mlx5e_post_meter_priv * +mlx5e_post_meter_init(struct mlx5e_priv *priv, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act, + enum mlx5e_post_meter_type type, + struct mlx5_fc *act_counter, + struct mlx5_fc *drop_counter, + struct mlx5_flow_attr *branch_true, + struct mlx5_flow_attr *branch_false); + +void +mlx5e_post_meter_cleanup(struct mlx5_eswitch *esw, struct mlx5e_post_meter_priv *post_meter); + +#else /* CONFIG_MLX5_CLS_ACT */ + +static inline struct mlx5_flow_table * +mlx5e_post_meter_get_mtu_true_ft(struct mlx5e_post_meter_priv *post_meter) +{ + return NULL; +} + +static inline struct mlx5_flow_table * +mlx5e_post_meter_get_mtu_false_ft(struct mlx5e_post_meter_priv *post_meter) +{ + return NULL; +} + +#endif + +#endif /* __MLX5_EN_POST_METER_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c new file mode 100644 index 0000000000..5db239cae8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c @@ -0,0 +1,655 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021 Mellanox Technologies. */ + +#include +#include +#include "en/mapping.h" +#include "en/tc/post_act.h" +#include "en/tc/act/sample.h" +#include "en/mod_hdr.h" +#include "sample.h" +#include "eswitch.h" +#include "en_tc.h" +#include "fs_core.h" + +#define MLX5_ESW_VPORT_TBL_SIZE_SAMPLE (64 * 1024) + +static struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_sample_ns = { + .max_fte = MLX5_ESW_VPORT_TBL_SIZE_SAMPLE, + .max_num_groups = 0, /* default num of groups */ + .flags = 0, +}; + +struct mlx5e_tc_psample { + struct mlx5_eswitch *esw; + struct mlx5_flow_table *termtbl; + struct mlx5_flow_handle *termtbl_rule; + DECLARE_HASHTABLE(hashtbl, 8); + struct mutex ht_lock; /* protect hashtbl */ + DECLARE_HASHTABLE(restore_hashtbl, 8); + struct mutex restore_lock; /* protect restore_hashtbl */ + struct mlx5e_post_act *post_act; +}; + +struct mlx5e_sampler { + struct hlist_node hlist; + u32 sampler_id; + u32 sample_ratio; + u32 sample_table_id; + u32 default_table_id; + int count; +}; + +struct mlx5e_sample_flow { + struct mlx5e_sampler *sampler; + struct mlx5e_sample_restore *restore; + struct mlx5_flow_attr *pre_attr; + struct mlx5_flow_handle *pre_rule; + struct mlx5_flow_attr *post_attr; + struct mlx5_flow_handle *post_rule; +}; + +struct mlx5e_sample_restore { + struct hlist_node hlist; + struct mlx5_modify_hdr *modify_hdr; + struct mlx5_flow_handle *rule; + u32 obj_id; + int count; +}; + +static int +sampler_termtbl_create(struct mlx5e_tc_psample *tc_psample) +{ + struct mlx5_eswitch *esw = tc_psample->esw; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_destination dest = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_act act = {}; + int err; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, termination_table)) { + mlx5_core_warn(dev, "termination table is not supported\n"); + return -EOPNOTSUPP; + } + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) { + mlx5_core_warn(dev, "failed to get FDB flow namespace\n"); + return -EOPNOTSUPP; + } + + ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION | MLX5_FLOW_TABLE_UNMANAGED; + ft_attr.autogroup.max_num_groups = 1; + ft_attr.prio = FDB_SLOW_PATH; + ft_attr.max_fte = 1; + ft_attr.level = 1; + tc_psample->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); + if (IS_ERR(tc_psample->termtbl)) { + err = PTR_ERR(tc_psample->termtbl); + mlx5_core_warn(dev, "failed to create termtbl, err: %d\n", err); + return err; + } + + act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest.vport.num = esw->manager_vport; + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + tc_psample->termtbl_rule = mlx5_add_flow_rules(tc_psample->termtbl, NULL, &act, &dest, 1); + if (IS_ERR(tc_psample->termtbl_rule)) { + err = PTR_ERR(tc_psample->termtbl_rule); + mlx5_core_warn(dev, "failed to create termtbl rule, err: %d\n", err); + mlx5_destroy_flow_table(tc_psample->termtbl); + return err; + } + + return 0; +} + +static void +sampler_termtbl_destroy(struct mlx5e_tc_psample *tc_psample) +{ + mlx5_del_flow_rules(tc_psample->termtbl_rule); + mlx5_destroy_flow_table(tc_psample->termtbl); +} + +static int +sampler_obj_create(struct mlx5_core_dev *mdev, struct mlx5e_sampler *sampler) +{ + u32 in[MLX5_ST_SZ_DW(create_sampler_obj_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + u64 general_obj_types; + void *obj; + int err; + + general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types); + if (!(general_obj_types & MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER)) + return -EOPNOTSUPP; + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, ignore_flow_level)) + return -EOPNOTSUPP; + + obj = MLX5_ADDR_OF(create_sampler_obj_in, in, sampler_object); + MLX5_SET(sampler_obj, obj, table_type, FS_FT_FDB); + MLX5_SET(sampler_obj, obj, ignore_flow_level, 1); + MLX5_SET(sampler_obj, obj, level, 1); + MLX5_SET(sampler_obj, obj, sample_ratio, sampler->sample_ratio); + MLX5_SET(sampler_obj, obj, sample_table_id, sampler->sample_table_id); + MLX5_SET(sampler_obj, obj, default_table_id, sampler->default_table_id); + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_SAMPLER); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (!err) + sampler->sampler_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return err; +} + +static void +sampler_obj_destroy(struct mlx5_core_dev *mdev, u32 sampler_id) +{ + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_SAMPLER); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sampler_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +static u32 +sampler_hash(u32 sample_ratio, u32 default_table_id) +{ + return jhash_2words(sample_ratio, default_table_id, 0); +} + +static int +sampler_cmp(u32 sample_ratio1, u32 default_table_id1, u32 sample_ratio2, u32 default_table_id2) +{ + return sample_ratio1 != sample_ratio2 || default_table_id1 != default_table_id2; +} + +static struct mlx5e_sampler * +sampler_get(struct mlx5e_tc_psample *tc_psample, u32 sample_ratio, u32 default_table_id) +{ + struct mlx5e_sampler *sampler; + u32 hash_key; + int err; + + mutex_lock(&tc_psample->ht_lock); + hash_key = sampler_hash(sample_ratio, default_table_id); + hash_for_each_possible(tc_psample->hashtbl, sampler, hlist, hash_key) + if (!sampler_cmp(sampler->sample_ratio, sampler->default_table_id, + sample_ratio, default_table_id)) + goto add_ref; + + sampler = kzalloc(sizeof(*sampler), GFP_KERNEL); + if (!sampler) { + err = -ENOMEM; + goto err_alloc; + } + + sampler->sample_table_id = tc_psample->termtbl->id; + sampler->default_table_id = default_table_id; + sampler->sample_ratio = sample_ratio; + + err = sampler_obj_create(tc_psample->esw->dev, sampler); + if (err) + goto err_create; + + hash_add(tc_psample->hashtbl, &sampler->hlist, hash_key); + +add_ref: + sampler->count++; + mutex_unlock(&tc_psample->ht_lock); + return sampler; + +err_create: + kfree(sampler); +err_alloc: + mutex_unlock(&tc_psample->ht_lock); + return ERR_PTR(err); +} + +static void +sampler_put(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sampler *sampler) +{ + mutex_lock(&tc_psample->ht_lock); + if (--sampler->count == 0) { + hash_del(&sampler->hlist); + sampler_obj_destroy(tc_psample->esw->dev, sampler->sampler_id); + kfree(sampler); + } + mutex_unlock(&tc_psample->ht_lock); +} + +/* obj_id is used to restore the sample parameters. + * Set fte_id in original flow table, then match it in the default table. + * Only set it for NICs can preserve reg_c or decap action. For other cases, + * use the same match in the default table. + * Use one header rewrite for both obj_id and fte_id. + */ +static struct mlx5_modify_hdr * +sample_modify_hdr_get(struct mlx5_core_dev *mdev, u32 obj_id, + struct mlx5e_tc_mod_hdr_acts *mod_acts) +{ + struct mlx5_modify_hdr *modify_hdr; + int err; + + err = mlx5e_tc_match_to_reg_set(mdev, mod_acts, MLX5_FLOW_NAMESPACE_FDB, + MAPPED_OBJ_TO_REG, obj_id); + if (err) + goto err_set_regc0; + + modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_FDB, + mod_acts->num_actions, + mod_acts->actions); + if (IS_ERR(modify_hdr)) { + err = PTR_ERR(modify_hdr); + goto err_modify_hdr; + } + + mlx5e_mod_hdr_dealloc(mod_acts); + return modify_hdr; + +err_modify_hdr: + mlx5e_mod_hdr_dealloc(mod_acts); +err_set_regc0: + return ERR_PTR(err); +} + +static struct mlx5e_sample_restore * +sample_restore_get(struct mlx5e_tc_psample *tc_psample, u32 obj_id, + struct mlx5e_tc_mod_hdr_acts *mod_acts) +{ + struct mlx5_eswitch *esw = tc_psample->esw; + struct mlx5_core_dev *mdev = esw->dev; + struct mlx5e_sample_restore *restore; + struct mlx5_modify_hdr *modify_hdr; + int err; + + mutex_lock(&tc_psample->restore_lock); + hash_for_each_possible(tc_psample->restore_hashtbl, restore, hlist, obj_id) + if (restore->obj_id == obj_id) + goto add_ref; + + restore = kzalloc(sizeof(*restore), GFP_KERNEL); + if (!restore) { + err = -ENOMEM; + goto err_alloc; + } + restore->obj_id = obj_id; + + modify_hdr = sample_modify_hdr_get(mdev, obj_id, mod_acts); + if (IS_ERR(modify_hdr)) { + err = PTR_ERR(modify_hdr); + goto err_modify_hdr; + } + restore->modify_hdr = modify_hdr; + + restore->rule = esw_add_restore_rule(esw, obj_id); + if (IS_ERR(restore->rule)) { + err = PTR_ERR(restore->rule); + goto err_restore; + } + + hash_add(tc_psample->restore_hashtbl, &restore->hlist, obj_id); +add_ref: + restore->count++; + mutex_unlock(&tc_psample->restore_lock); + return restore; + +err_restore: + mlx5_modify_header_dealloc(mdev, restore->modify_hdr); +err_modify_hdr: + kfree(restore); +err_alloc: + mutex_unlock(&tc_psample->restore_lock); + return ERR_PTR(err); +} + +static void +sample_restore_put(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sample_restore *restore) +{ + mutex_lock(&tc_psample->restore_lock); + if (--restore->count == 0) + hash_del(&restore->hlist); + mutex_unlock(&tc_psample->restore_lock); + + if (!restore->count) { + mlx5_del_flow_rules(restore->rule); + mlx5_modify_header_dealloc(tc_psample->esw->dev, restore->modify_hdr); + kfree(restore); + } +} + +void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj) +{ + u32 trunc_size = mapped_obj->sample.trunc_size; + struct psample_group psample_group = {}; + struct psample_metadata md = {}; + + md.trunc_size = trunc_size ? min(trunc_size, skb->len) : skb->len; + md.in_ifindex = skb->dev->ifindex; + psample_group.group_num = mapped_obj->sample.group_id; + psample_group.net = &init_net; + skb_push(skb, skb->mac_len); + + psample_sample_packet(&psample_group, skb, mapped_obj->sample.rate, &md); +} + +static int +add_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow, + struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr, + u32 *default_tbl_id) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + u32 attr_sz = ns_to_attr_sz(MLX5_FLOW_NAMESPACE_FDB); + struct mlx5_vport_tbl_attr per_vport_tbl_attr; + struct mlx5_flow_table *default_tbl; + struct mlx5_flow_attr *post_attr; + int err; + + /* Allocate default table per vport, chain and prio. Otherwise, there is + * only one default table for the same sampler object. Rules with different + * prio and chain may overlap. For CT sample action, per vport default + * table is needed to resotre the metadata. + */ + per_vport_tbl_attr.chain = attr->chain; + per_vport_tbl_attr.prio = attr->prio; + per_vport_tbl_attr.vport = esw_attr->in_rep->vport; + per_vport_tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns; + default_tbl = mlx5_esw_vporttbl_get(esw, &per_vport_tbl_attr); + if (IS_ERR(default_tbl)) { + err = PTR_ERR(default_tbl); + goto err_default_tbl; + } + *default_tbl_id = default_tbl->id; + + post_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); + if (!post_attr) { + err = -ENOMEM; + goto err_attr; + } + sample_flow->post_attr = post_attr; + memcpy(post_attr, attr, attr_sz); + /* Perform the original matches on the default table. + * Offload all actions except the sample action. + */ + post_attr->chain = 0; + post_attr->prio = 0; + post_attr->ft = default_tbl; + post_attr->flags = MLX5_ATTR_FLAG_NO_IN_PORT; + + /* When offloading sample and encap action, if there is no valid + * neigh data struct, a slow path rule is offloaded first. Source + * port metadata match is set at that time. A per vport table is + * already allocated. No need to match it again. So clear the source + * port metadata match. + */ + mlx5_eswitch_clear_rule_source_port(esw, spec); + sample_flow->post_rule = mlx5_eswitch_add_offloaded_rule(esw, spec, post_attr); + if (IS_ERR(sample_flow->post_rule)) { + err = PTR_ERR(sample_flow->post_rule); + goto err_rule; + } + return 0; + +err_rule: + kfree(post_attr); +err_attr: + mlx5_esw_vporttbl_put(esw, &per_vport_tbl_attr); +err_default_tbl: + return err; +} + +static void +del_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_vport_tbl_attr tbl_attr; + + mlx5_eswitch_del_offloaded_rule(esw, sample_flow->post_rule, sample_flow->post_attr); + kfree(sample_flow->post_attr); + tbl_attr.chain = attr->chain; + tbl_attr.prio = attr->prio; + tbl_attr.vport = esw_attr->in_rep->vport; + tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns; + mlx5_esw_vporttbl_put(esw, &tbl_attr); +} + +/* For the following typical flow table: + * + * +-------------------------------+ + * + original flow table + + * +-------------------------------+ + * + original match + + * +-------------------------------+ + * + sample action + other actions + + * +-------------------------------+ + * + * We translate the tc filter with sample action to the following HW model: + * + * +---------------------+ + * + original flow table + + * +---------------------+ + * + original match + + * +---------------------+ + * | set fte_id (if reg_c preserve cap) + * | do decap (if required) + * v + * +------------------------------------------------+ + * + Flow Sampler Object + + * +------------------------------------------------+ + * + sample ratio + + * +------------------------------------------------+ + * + sample table id | default table id + + * +------------------------------------------------+ + * | | + * v v + * +-----------------------------+ +-------------------+ + * + sample table + + default table + + * +-----------------------------+ +-------------------+ + * + forward to management vport + | + * +-----------------------------+ | + * +-------+------+ + * | |reg_c preserve cap + * | |or decap action + * v v + * +-----------------+ +-------------+ + * + per vport table + + post action + + * +-----------------+ +-------------+ + * + original match + + * +-----------------+ + * + other actions + + * +-----------------+ + */ +struct mlx5_flow_handle * +mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_esw_flow_attr *pre_esw_attr; + struct mlx5_mapped_obj restore_obj = {}; + struct mlx5e_tc_mod_hdr_acts *mod_acts; + struct mlx5e_sample_flow *sample_flow; + struct mlx5e_sample_attr *sample_attr; + struct mlx5_flow_attr *pre_attr; + struct mlx5_eswitch *esw; + u32 default_tbl_id; + u32 obj_id; + int err; + + if (IS_ERR_OR_NULL(tc_psample)) + return ERR_PTR(-EOPNOTSUPP); + + sample_flow = kzalloc(sizeof(*sample_flow), GFP_KERNEL); + if (!sample_flow) + return ERR_PTR(-ENOMEM); + sample_attr = &attr->sample_attr; + sample_attr->sample_flow = sample_flow; + + /* For NICs with reg_c_preserve support or decap action, use + * post action instead of the per vport, chain and prio table. + * Only match the fte id instead of the same match in the + * original flow table. + */ + esw = tc_psample->esw; + if (mlx5e_tc_act_sample_is_multi_table(esw->dev, attr)) { + struct mlx5_flow_table *ft; + + ft = mlx5e_tc_post_act_get_ft(tc_psample->post_act); + default_tbl_id = ft->id; + } else { + err = add_post_rule(esw, sample_flow, spec, attr, &default_tbl_id); + if (err) + goto err_post_rule; + } + + /* Create sampler object. */ + sample_flow->sampler = sampler_get(tc_psample, sample_attr->rate, default_tbl_id); + if (IS_ERR(sample_flow->sampler)) { + err = PTR_ERR(sample_flow->sampler); + goto err_sampler; + } + sample_attr->sampler_id = sample_flow->sampler->sampler_id; + + /* Create an id mapping reg_c0 value to sample object. */ + restore_obj.type = MLX5_MAPPED_OBJ_SAMPLE; + restore_obj.sample.group_id = sample_attr->group_num; + restore_obj.sample.rate = sample_attr->rate; + restore_obj.sample.trunc_size = sample_attr->trunc_size; + restore_obj.sample.tunnel_id = attr->tunnel_id; + err = mapping_add(esw->offloads.reg_c0_obj_pool, &restore_obj, &obj_id); + if (err) + goto err_obj_id; + sample_attr->restore_obj_id = obj_id; + + /* Create sample restore context. */ + mod_acts = &attr->parse_attr->mod_hdr_acts; + sample_flow->restore = sample_restore_get(tc_psample, obj_id, mod_acts); + if (IS_ERR(sample_flow->restore)) { + err = PTR_ERR(sample_flow->restore); + goto err_sample_restore; + } + + /* Perform the original matches on the original table. Offload the + * sample action. The destination is the sampler object. + */ + pre_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); + if (!pre_attr) { + err = -ENOMEM; + goto err_alloc_pre_flow_attr; + } + pre_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + /* For decap action, do decap in the original flow table instead of the + * default flow table. + */ + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) + pre_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; + pre_attr->modify_hdr = sample_flow->restore->modify_hdr; + pre_attr->flags = MLX5_ATTR_FLAG_SAMPLE; + pre_attr->inner_match_level = attr->inner_match_level; + pre_attr->outer_match_level = attr->outer_match_level; + pre_attr->chain = attr->chain; + pre_attr->prio = attr->prio; + pre_attr->ft = attr->ft; + pre_attr->sample_attr = *sample_attr; + pre_esw_attr = pre_attr->esw_attr; + pre_esw_attr->in_mdev = esw_attr->in_mdev; + pre_esw_attr->in_rep = esw_attr->in_rep; + sample_flow->pre_rule = mlx5_eswitch_add_offloaded_rule(esw, spec, pre_attr); + if (IS_ERR(sample_flow->pre_rule)) { + err = PTR_ERR(sample_flow->pre_rule); + goto err_pre_offload_rule; + } + sample_flow->pre_attr = pre_attr; + + return sample_flow->pre_rule; + +err_pre_offload_rule: + kfree(pre_attr); +err_alloc_pre_flow_attr: + sample_restore_put(tc_psample, sample_flow->restore); +err_sample_restore: + mapping_remove(esw->offloads.reg_c0_obj_pool, obj_id); +err_obj_id: + sampler_put(tc_psample, sample_flow->sampler); +err_sampler: + if (sample_flow->post_rule) + del_post_rule(esw, sample_flow, attr); +err_post_rule: + kfree(sample_flow); + return ERR_PTR(err); +} + +void +mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_sample_flow *sample_flow; + struct mlx5_eswitch *esw; + + if (IS_ERR_OR_NULL(tc_psample)) + return; + + /* The following delete order can't be changed, otherwise, + * will hit fw syndromes. + */ + esw = tc_psample->esw; + sample_flow = attr->sample_attr.sample_flow; + mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, sample_flow->pre_attr); + + sample_restore_put(tc_psample, sample_flow->restore); + mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr.restore_obj_id); + sampler_put(tc_psample, sample_flow->sampler); + if (sample_flow->post_rule) + del_post_rule(esw, sample_flow, attr); + + kfree(sample_flow->pre_attr); + kfree(sample_flow); +} + +struct mlx5e_tc_psample * +mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act) +{ + struct mlx5e_tc_psample *tc_psample; + int err; + + tc_psample = kzalloc(sizeof(*tc_psample), GFP_KERNEL); + if (!tc_psample) + return ERR_PTR(-ENOMEM); + if (IS_ERR_OR_NULL(post_act)) { + err = PTR_ERR(post_act); + goto err_post_act; + } + tc_psample->post_act = post_act; + tc_psample->esw = esw; + err = sampler_termtbl_create(tc_psample); + if (err) + goto err_post_act; + + mutex_init(&tc_psample->ht_lock); + mutex_init(&tc_psample->restore_lock); + + return tc_psample; + +err_post_act: + kfree(tc_psample); + return ERR_PTR(err); +} + +void +mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample) +{ + if (IS_ERR_OR_NULL(tc_psample)) + return; + + mutex_destroy(&tc_psample->restore_lock); + mutex_destroy(&tc_psample->ht_lock); + sampler_termtbl_destroy(tc_psample); + kfree(tc_psample); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h new file mode 100644 index 0000000000..a569367eae --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TC_SAMPLE_H__ +#define __MLX5_EN_TC_SAMPLE_H__ + +#include "eswitch.h" + +struct mlx5_flow_attr; +struct mlx5e_tc_psample; +struct mlx5e_post_act; + +struct mlx5e_sample_attr { + u32 group_num; + u32 rate; + u32 trunc_size; + u32 restore_obj_id; + u32 sampler_id; + struct mlx5e_sample_flow *sample_flow; +}; + +#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) + +void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj); + +struct mlx5_flow_handle * +mlx5e_tc_sample_offload(struct mlx5e_tc_psample *sample_priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); + +void +mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *sample_priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr); + +struct mlx5e_tc_psample * +mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act); + +void +mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample); + +#else /* CONFIG_MLX5_TC_SAMPLE */ + +static inline struct mlx5_flow_handle * +mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr) +{ return ERR_PTR(-EOPNOTSUPP); } + +static inline void +mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr) {} + +static inline struct mlx5e_tc_psample * +mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act) +{ return ERR_PTR(-EOPNOTSUPP); } + +static inline void +mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample) {} + +static inline void +mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj) {} + +#endif /* CONFIG_MLX5_TC_SAMPLE */ +#endif /* __MLX5_EN_TC_SAMPLE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c new file mode 100644 index 0000000000..fadfa8b50b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -0,0 +1,2327 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lib/fs_chains.h" +#include "en/tc_ct.h" +#include "en/tc/ct_fs.h" +#include "en/tc_priv.h" +#include "en/mod_hdr.h" +#include "en/mapping.h" +#include "en/tc/post_act.h" +#include "en.h" +#include "en_tc.h" +#include "en_rep.h" +#include "fs_core.h" + +#define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1) +#define MLX5_CT_STATE_TRK_BIT BIT(2) +#define MLX5_CT_STATE_NAT_BIT BIT(3) +#define MLX5_CT_STATE_REPLY_BIT BIT(4) +#define MLX5_CT_STATE_RELATED_BIT BIT(5) +#define MLX5_CT_STATE_INVALID_BIT BIT(6) +#define MLX5_CT_STATE_NEW_BIT BIT(7) + +#define MLX5_CT_LABELS_BITS MLX5_REG_MAPPING_MBITS(LABELS_TO_REG) +#define MLX5_CT_LABELS_MASK MLX5_REG_MAPPING_MASK(LABELS_TO_REG) + +/* Statically allocate modify actions for + * ipv6 and port nat (5) + tuple fields (4) + nic mode zone restore (1) = 10. + * This will be increased dynamically if needed (for the ipv6 snat + dnat). + */ +#define MLX5_CT_MIN_MOD_ACTS 10 + +#define ct_dbg(fmt, args...)\ + netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args) + +struct mlx5_tc_ct_debugfs { + struct { + atomic_t offloaded; + atomic_t rx_dropped; + } stats; + + struct dentry *root; +}; + +struct mlx5_tc_ct_priv { + struct mlx5_core_dev *dev; + struct mlx5e_priv *priv; + const struct net_device *netdev; + struct mod_hdr_tbl *mod_hdr_tbl; + struct xarray tuple_ids; + struct rhashtable zone_ht; + struct rhashtable ct_tuples_ht; + struct rhashtable ct_tuples_nat_ht; + struct mlx5_flow_table *ct; + struct mlx5_flow_table *ct_nat; + struct mlx5e_post_act *post_act; + struct mutex control_lock; /* guards parallel adds/dels */ + struct mapping_ctx *zone_mapping; + struct mapping_ctx *labels_mapping; + enum mlx5_flow_namespace_type ns_type; + struct mlx5_fs_chains *chains; + struct mlx5_ct_fs *fs; + struct mlx5_ct_fs_ops *fs_ops; + spinlock_t ht_lock; /* protects ft entries */ + struct workqueue_struct *wq; + + struct mlx5_tc_ct_debugfs debugfs; +}; + +struct mlx5_ct_zone_rule { + struct mlx5_ct_fs_rule *rule; + struct mlx5e_mod_hdr_handle *mh; + struct mlx5_flow_attr *attr; + bool nat; +}; + +struct mlx5_tc_ct_pre { + struct mlx5_flow_table *ft; + struct mlx5_flow_group *flow_grp; + struct mlx5_flow_group *miss_grp; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_handle *miss_rule; + struct mlx5_modify_hdr *modify_hdr; +}; + +struct mlx5_ct_ft { + struct rhash_head node; + u16 zone; + u32 zone_restore_id; + refcount_t refcount; + struct nf_flowtable *nf_ft; + struct mlx5_tc_ct_priv *ct_priv; + struct rhashtable ct_entries_ht; + struct mlx5_tc_ct_pre pre_ct; + struct mlx5_tc_ct_pre pre_ct_nat; +}; + +struct mlx5_ct_tuple { + u16 addr_type; + __be16 n_proto; + u8 ip_proto; + struct { + union { + __be32 src_v4; + struct in6_addr src_v6; + }; + union { + __be32 dst_v4; + struct in6_addr dst_v6; + }; + } ip; + struct { + __be16 src; + __be16 dst; + } port; + + u16 zone; +}; + +struct mlx5_ct_counter { + struct mlx5_fc *counter; + refcount_t refcount; + bool is_shared; +}; + +enum { + MLX5_CT_ENTRY_FLAG_VALID, +}; + +struct mlx5_ct_entry { + struct rhash_head node; + struct rhash_head tuple_node; + struct rhash_head tuple_nat_node; + struct mlx5_ct_counter *counter; + unsigned long cookie; + unsigned long restore_cookie; + struct mlx5_ct_tuple tuple; + struct mlx5_ct_tuple tuple_nat; + struct mlx5_ct_zone_rule zone_rules[2]; + + struct mlx5_tc_ct_priv *ct_priv; + struct work_struct work; + + refcount_t refcnt; + unsigned long flags; +}; + +static void +mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_flow_attr *attr, + struct mlx5e_mod_hdr_handle *mh); + +static const struct rhashtable_params cts_ht_params = { + .head_offset = offsetof(struct mlx5_ct_entry, node), + .key_offset = offsetof(struct mlx5_ct_entry, cookie), + .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie), + .automatic_shrinking = true, + .min_size = 16 * 1024, +}; + +static const struct rhashtable_params zone_params = { + .head_offset = offsetof(struct mlx5_ct_ft, node), + .key_offset = offsetof(struct mlx5_ct_ft, zone), + .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone), + .automatic_shrinking = true, +}; + +static const struct rhashtable_params tuples_ht_params = { + .head_offset = offsetof(struct mlx5_ct_entry, tuple_node), + .key_offset = offsetof(struct mlx5_ct_entry, tuple), + .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple), + .automatic_shrinking = true, + .min_size = 16 * 1024, +}; + +static const struct rhashtable_params tuples_nat_ht_params = { + .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node), + .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat), + .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat), + .automatic_shrinking = true, + .min_size = 16 * 1024, +}; + +static bool +mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry) +{ + return !!(entry->tuple_nat_node.next); +} + +static int +mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv, + u32 *labels, u32 *id) +{ + if (!memchr_inv(labels, 0, sizeof(u32) * 4)) { + *id = 0; + return 0; + } + + if (mapping_add(ct_priv->labels_mapping, labels, id)) + return -EOPNOTSUPP; + + return 0; +} + +static void +mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id) +{ + if (id) + mapping_remove(ct_priv->labels_mapping, id); +} + +static int +mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule) +{ + struct flow_match_control control; + struct flow_match_basic basic; + + flow_rule_match_basic(rule, &basic); + flow_rule_match_control(rule, &control); + + tuple->n_proto = basic.key->n_proto; + tuple->ip_proto = basic.key->ip_proto; + tuple->addr_type = control.key->addr_type; + + if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_ipv4_addrs(rule, &match); + tuple->ip.src_v4 = match.key->src; + tuple->ip.dst_v4 = match.key->dst; + } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_ipv6_addrs(rule, &match); + tuple->ip.src_v6 = match.key->src; + tuple->ip.dst_v6 = match.key->dst; + } else { + return -EOPNOTSUPP; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; + + flow_rule_match_ports(rule, &match); + switch (tuple->ip_proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + tuple->port.src = match.key->src; + tuple->port.dst = match.key->dst; + break; + default: + return -EOPNOTSUPP; + } + } else { + if (tuple->ip_proto != IPPROTO_GRE) + return -EOPNOTSUPP; + } + + return 0; +} + +static int +mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple, + struct flow_rule *rule) +{ + struct flow_action *flow_action = &rule->action; + struct flow_action_entry *act; + u32 offset, val, ip6_offset; + int i; + + flow_action_for_each(i, act, flow_action) { + if (act->id != FLOW_ACTION_MANGLE) + continue; + + offset = act->mangle.offset; + val = act->mangle.val; + switch (act->mangle.htype) { + case FLOW_ACT_MANGLE_HDR_TYPE_IP4: + if (offset == offsetof(struct iphdr, saddr)) + tuple->ip.src_v4 = cpu_to_be32(val); + else if (offset == offsetof(struct iphdr, daddr)) + tuple->ip.dst_v4 = cpu_to_be32(val); + else + return -EOPNOTSUPP; + break; + + case FLOW_ACT_MANGLE_HDR_TYPE_IP6: + ip6_offset = (offset - offsetof(struct ipv6hdr, saddr)); + ip6_offset /= 4; + if (ip6_offset < 4) + tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val); + else if (ip6_offset < 8) + tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val); + else + return -EOPNOTSUPP; + break; + + case FLOW_ACT_MANGLE_HDR_TYPE_TCP: + if (offset == offsetof(struct tcphdr, source)) + tuple->port.src = cpu_to_be16(val); + else if (offset == offsetof(struct tcphdr, dest)) + tuple->port.dst = cpu_to_be16(val); + else + return -EOPNOTSUPP; + break; + + case FLOW_ACT_MANGLE_HDR_TYPE_UDP: + if (offset == offsetof(struct udphdr, source)) + tuple->port.src = cpu_to_be16(val); + else if (offset == offsetof(struct udphdr, dest)) + tuple->port.dst = cpu_to_be16(val); + else + return -EOPNOTSUPP; + break; + + default: + return -EOPNOTSUPP; + } + } + + return 0; +} + +static int +mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv *ct_priv, + struct net_device *ndev) +{ + struct mlx5e_priv *other_priv = netdev_priv(ndev); + struct mlx5_core_dev *mdev = ct_priv->dev; + bool vf_rep, uplink_rep; + + vf_rep = mlx5e_eswitch_vf_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev); + uplink_rep = mlx5e_eswitch_uplink_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev); + + if (vf_rep) + return MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; + if (uplink_rep) + return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + if (is_vlan_dev(ndev)) + return mlx5_tc_ct_get_flow_source_match(ct_priv, vlan_dev_real_dev(ndev)); + if (netif_is_macvlan(ndev)) + return mlx5_tc_ct_get_flow_source_match(ct_priv, macvlan_dev_real_dev(ndev)); + if (mlx5e_get_tc_tun(ndev) || netif_is_lag_master(ndev)) + return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + + return MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT; +} + +static int +mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_flow_spec *spec, + struct flow_rule *rule) +{ + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + u16 addr_type = 0; + u8 ip_proto = 0; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + + mlx5e_tc_set_ethertype(ct_priv->dev, &match, true, headers_c, headers_v); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, + match.mask->ip_proto); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, + match.key->ip_proto); + + ip_proto = match.key->ip_proto; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; + + flow_rule_match_control(rule, &match); + addr_type = match.key->addr_type; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_ipv4_addrs(rule, &match); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &match.mask->src, sizeof(match.mask->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &match.key->src, sizeof(match.key->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &match.mask->dst, sizeof(match.mask->dst)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &match.key->dst, sizeof(match.key->dst)); + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_ipv6_addrs(rule, &match); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.mask->src, sizeof(match.mask->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.key->src, sizeof(match.key->src)); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.mask->dst, sizeof(match.mask->dst)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.key->dst, sizeof(match.key->dst)); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; + + flow_rule_match_ports(rule, &match); + switch (ip_proto) { + case IPPROTO_TCP: + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + tcp_sport, ntohs(match.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + tcp_sport, ntohs(match.key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + tcp_dport, ntohs(match.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + tcp_dport, ntohs(match.key->dst)); + break; + + case IPPROTO_UDP: + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_sport, ntohs(match.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_sport, ntohs(match.key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_dport, ntohs(match.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_dport, ntohs(match.key->dst)); + break; + default: + break; + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { + struct flow_match_tcp match; + + flow_rule_match_tcp(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, + ntohs(match.mask->flags)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, + ntohs(match.key->flags)); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) { + struct flow_match_meta match; + + flow_rule_match_meta(rule, &match); + + if (match.key->ingress_ifindex & match.mask->ingress_ifindex) { + struct net_device *dev; + + dev = dev_get_by_index(&init_net, match.key->ingress_ifindex); + if (dev && MLX5_CAP_ESW_FLOWTABLE(ct_priv->dev, flow_source)) + spec->flow_context.flow_source = + mlx5_tc_ct_get_flow_source_match(ct_priv, dev); + + dev_put(dev); + } + } + + return 0; +} + +static void +mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry) +{ + if (entry->counter->is_shared && + !refcount_dec_and_test(&entry->counter->refcount)) + return; + + mlx5_fc_destroy(ct_priv->dev, entry->counter->counter); + kfree(entry->counter); +} + +static void +mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_ct_entry *entry, + bool nat) +{ + struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; + struct mlx5_flow_attr *attr = zone_rule->attr; + + ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone); + + ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule); + mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh); + mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); + kfree(attr); +} + +static void +mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_ct_entry *entry) +{ + mlx5_tc_ct_entry_del_rule(ct_priv, entry, true); + mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); + + atomic_dec(&ct_priv->debugfs.stats.offloaded); +} + +static struct flow_action_entry * +mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule) +{ + struct flow_action *flow_action = &flow_rule->action; + struct flow_action_entry *act; + int i; + + flow_action_for_each(i, act, flow_action) { + if (act->id == FLOW_ACTION_CT_METADATA) + return act; + } + + return NULL; +} + +static int +mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5e_tc_mod_hdr_acts *mod_acts, + u8 ct_state, + u32 mark, + u32 labels_id, + u8 zone_restore_id) +{ + enum mlx5_flow_namespace_type ns = ct_priv->ns_type; + struct mlx5_core_dev *dev = ct_priv->dev; + int err; + + err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, + CTSTATE_TO_REG, ct_state); + if (err) + return err; + + err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, + MARK_TO_REG, mark); + if (err) + return err; + + err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, + LABELS_TO_REG, labels_id); + if (err) + return err; + + err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, + ZONE_RESTORE_TO_REG, zone_restore_id); + if (err) + return err; + + /* Make another copy of zone id in reg_b for + * NIC rx flows since we don't copy reg_c1 to + * reg_b upon miss. + */ + if (ns != MLX5_FLOW_NAMESPACE_FDB) { + err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns, + NIC_ZONE_RESTORE_TO_REG, zone_restore_id); + if (err) + return err; + } + return 0; +} + +static int +mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act, + char *modact) +{ + u32 offset = act->mangle.offset, field; + + switch (act->mangle.htype) { + case FLOW_ACT_MANGLE_HDR_TYPE_IP4: + MLX5_SET(set_action_in, modact, length, 0); + if (offset == offsetof(struct iphdr, saddr)) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV4; + else if (offset == offsetof(struct iphdr, daddr)) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV4; + else + return -EOPNOTSUPP; + break; + + case FLOW_ACT_MANGLE_HDR_TYPE_IP6: + MLX5_SET(set_action_in, modact, length, 0); + if (offset == offsetof(struct ipv6hdr, saddr) + 12) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0; + else if (offset == offsetof(struct ipv6hdr, saddr) + 8) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32; + else if (offset == offsetof(struct ipv6hdr, saddr) + 4) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64; + else if (offset == offsetof(struct ipv6hdr, saddr)) + field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96; + else if (offset == offsetof(struct ipv6hdr, daddr) + 12) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0; + else if (offset == offsetof(struct ipv6hdr, daddr) + 8) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32; + else if (offset == offsetof(struct ipv6hdr, daddr) + 4) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64; + else if (offset == offsetof(struct ipv6hdr, daddr)) + field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96; + else + return -EOPNOTSUPP; + break; + + case FLOW_ACT_MANGLE_HDR_TYPE_TCP: + MLX5_SET(set_action_in, modact, length, 16); + if (offset == offsetof(struct tcphdr, source)) + field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT; + else if (offset == offsetof(struct tcphdr, dest)) + field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT; + else + return -EOPNOTSUPP; + break; + + case FLOW_ACT_MANGLE_HDR_TYPE_UDP: + MLX5_SET(set_action_in, modact, length, 16); + if (offset == offsetof(struct udphdr, source)) + field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT; + else if (offset == offsetof(struct udphdr, dest)) + field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT; + else + return -EOPNOTSUPP; + break; + + default: + return -EOPNOTSUPP; + } + + MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, modact, offset, 0); + MLX5_SET(set_action_in, modact, field, field); + MLX5_SET(set_action_in, modact, data, act->mangle.val); + + return 0; +} + +static int +mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv, + struct flow_rule *flow_rule, + struct mlx5e_tc_mod_hdr_acts *mod_acts) +{ + struct flow_action *flow_action = &flow_rule->action; + struct mlx5_core_dev *mdev = ct_priv->dev; + struct flow_action_entry *act; + char *modact; + int err, i; + + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_MANGLE: { + modact = mlx5e_mod_hdr_alloc(mdev, ct_priv->ns_type, mod_acts); + if (IS_ERR(modact)) + return PTR_ERR(modact); + + err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact); + if (err) + return err; + + mod_acts->num_actions++; + } + break; + + case FLOW_ACTION_CT_METADATA: + /* Handled earlier */ + continue; + default: + return -EOPNOTSUPP; + } + } + + return 0; +} + +static int +mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_flow_attr *attr, + struct flow_rule *flow_rule, + struct mlx5e_mod_hdr_handle **mh, + u8 zone_restore_id, bool nat_table, bool has_nat) +{ + DECLARE_MOD_HDR_ACTS_ACTIONS(actions_arr, MLX5_CT_MIN_MOD_ACTS); + DECLARE_MOD_HDR_ACTS(mod_acts, actions_arr); + struct flow_action_entry *meta; + enum ip_conntrack_info ctinfo; + u16 ct_state = 0; + int err; + + meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule); + if (!meta) + return -EOPNOTSUPP; + ctinfo = meta->ct_metadata.cookie & NFCT_INFOMASK; + + err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels, + &attr->ct_attr.ct_labels_id); + if (err) + return -EOPNOTSUPP; + if (nat_table) { + if (has_nat) { + err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, &mod_acts); + if (err) + goto err_mapping; + } + + ct_state |= MLX5_CT_STATE_NAT_BIT; + } + + ct_state |= MLX5_CT_STATE_TRK_BIT; + ct_state |= ctinfo == IP_CT_NEW ? MLX5_CT_STATE_NEW_BIT : MLX5_CT_STATE_ESTABLISHED_BIT; + ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT; + err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts, + ct_state, + meta->ct_metadata.mark, + attr->ct_attr.ct_labels_id, + zone_restore_id); + if (err) + goto err_mapping; + + if (nat_table && has_nat) { + attr->modify_hdr = mlx5_modify_header_alloc(ct_priv->dev, ct_priv->ns_type, + mod_acts.num_actions, + mod_acts.actions); + if (IS_ERR(attr->modify_hdr)) { + err = PTR_ERR(attr->modify_hdr); + goto err_mapping; + } + + *mh = NULL; + } else { + *mh = mlx5e_mod_hdr_attach(ct_priv->dev, + ct_priv->mod_hdr_tbl, + ct_priv->ns_type, + &mod_acts); + if (IS_ERR(*mh)) { + err = PTR_ERR(*mh); + goto err_mapping; + } + attr->modify_hdr = mlx5e_mod_hdr_get(*mh); + } + + mlx5e_mod_hdr_dealloc(&mod_acts); + return 0; + +err_mapping: + mlx5e_mod_hdr_dealloc(&mod_acts); + mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); + return err; +} + +static void +mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_flow_attr *attr, + struct mlx5e_mod_hdr_handle *mh) +{ + if (mh) + mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, mh); + else + mlx5_modify_header_dealloc(ct_priv->dev, attr->modify_hdr); +} + +static int +mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, + struct flow_rule *flow_rule, + struct mlx5_ct_entry *entry, + bool nat, u8 zone_restore_id) +{ + struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; + struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); + struct mlx5_flow_spec *spec = NULL; + struct mlx5_flow_attr *attr; + int err; + + zone_rule->nat = nat; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + attr = mlx5_alloc_flow_attr(ct_priv->ns_type); + if (!attr) { + err = -ENOMEM; + goto err_attr; + } + + err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, + &zone_rule->mh, + zone_restore_id, + nat, + mlx5_tc_ct_entry_has_nat(entry)); + if (err) { + ct_dbg("Failed to create ct entry mod hdr"); + goto err_mod_hdr; + } + + attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->dest_chain = 0; + attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act); + attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct; + if (entry->tuple.ip_proto == IPPROTO_TCP || + entry->tuple.ip_proto == IPPROTO_UDP) + attr->outer_match_level = MLX5_MATCH_L4; + else + attr->outer_match_level = MLX5_MATCH_L3; + attr->counter = entry->counter->counter; + attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT; + if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) + attr->esw_attr->in_mdev = priv->mdev; + + mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule); + mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK); + + zone_rule->rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule); + if (IS_ERR(zone_rule->rule)) { + err = PTR_ERR(zone_rule->rule); + ct_dbg("Failed to add ct entry rule, nat: %d", nat); + goto err_rule; + } + + zone_rule->attr = attr; + + kvfree(spec); + ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone); + + return 0; + +err_rule: + mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh); + mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); +err_mod_hdr: + kfree(attr); +err_attr: + kvfree(spec); + return err; +} + +static int +mlx5_tc_ct_entry_replace_rule(struct mlx5_tc_ct_priv *ct_priv, + struct flow_rule *flow_rule, + struct mlx5_ct_entry *entry, + bool nat, u8 zone_restore_id) +{ + struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; + struct mlx5_flow_attr *attr = zone_rule->attr, *old_attr; + struct mlx5e_mod_hdr_handle *mh; + struct mlx5_ct_fs_rule *rule; + struct mlx5_flow_spec *spec; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + old_attr = mlx5_alloc_flow_attr(ct_priv->ns_type); + if (!old_attr) { + err = -ENOMEM; + goto err_attr; + } + *old_attr = *attr; + + err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, &mh, zone_restore_id, + nat, mlx5_tc_ct_entry_has_nat(entry)); + if (err) { + ct_dbg("Failed to create ct entry mod hdr"); + goto err_mod_hdr; + } + + mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule); + mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK); + + rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + ct_dbg("Failed to add replacement ct entry rule, nat: %d", nat); + goto err_rule; + } + + ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule); + zone_rule->rule = rule; + mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, old_attr, zone_rule->mh); + zone_rule->mh = mh; + mlx5_put_label_mapping(ct_priv, old_attr->ct_attr.ct_labels_id); + + kfree(old_attr); + kvfree(spec); + ct_dbg("Replaced ct entry rule in zone %d", entry->tuple.zone); + + return 0; + +err_rule: + mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, mh); + mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); +err_mod_hdr: + kfree(old_attr); +err_attr: + kvfree(spec); + return err; +} + +static bool +mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry) +{ + return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags); +} + +static struct mlx5_ct_entry * +mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple) +{ + struct mlx5_ct_entry *entry; + + entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple, + tuples_ht_params); + if (entry && mlx5_tc_ct_entry_valid(entry) && + refcount_inc_not_zero(&entry->refcnt)) { + return entry; + } else if (!entry) { + entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht, + tuple, tuples_nat_ht_params); + if (entry && mlx5_tc_ct_entry_valid(entry) && + refcount_inc_not_zero(&entry->refcnt)) + return entry; + } + + return entry ? ERR_PTR(-EINVAL) : NULL; +} + +static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry) +{ + struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv; + + rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, + &entry->tuple_nat_node, + tuples_nat_ht_params); + rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node, + tuples_ht_params); +} + +static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry) +{ + struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv; + + mlx5_tc_ct_entry_del_rules(ct_priv, entry); + + spin_lock_bh(&ct_priv->ht_lock); + mlx5_tc_ct_entry_remove_from_tuples(entry); + spin_unlock_bh(&ct_priv->ht_lock); + + mlx5_tc_ct_counter_put(ct_priv, entry); + kfree(entry); +} + +static void +mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) +{ + if (!refcount_dec_and_test(&entry->refcnt)) + return; + + mlx5_tc_ct_entry_del(entry); +} + +static void mlx5_tc_ct_entry_del_work(struct work_struct *work) +{ + struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work); + + mlx5_tc_ct_entry_del(entry); +} + +static void +__mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) +{ + if (!refcount_dec_and_test(&entry->refcnt)) + return; + + INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work); + queue_work(entry->ct_priv->wq, &entry->work); +} + +static struct mlx5_ct_counter * +mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv) +{ + struct mlx5_ct_counter *counter; + int ret; + + counter = kzalloc(sizeof(*counter), GFP_KERNEL); + if (!counter) + return ERR_PTR(-ENOMEM); + + counter->is_shared = false; + counter->counter = mlx5_fc_create_ex(ct_priv->dev, true); + if (IS_ERR(counter->counter)) { + ct_dbg("Failed to create counter for ct entry"); + ret = PTR_ERR(counter->counter); + kfree(counter); + return ERR_PTR(ret); + } + + return counter; +} + +static struct mlx5_ct_counter * +mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_ct_entry *entry) +{ + struct mlx5_ct_tuple rev_tuple = entry->tuple; + struct mlx5_ct_counter *shared_counter; + struct mlx5_ct_entry *rev_entry; + + /* get the reversed tuple */ + swap(rev_tuple.port.src, rev_tuple.port.dst); + + if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + __be32 tmp_addr = rev_tuple.ip.src_v4; + + rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4; + rev_tuple.ip.dst_v4 = tmp_addr; + } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct in6_addr tmp_addr = rev_tuple.ip.src_v6; + + rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6; + rev_tuple.ip.dst_v6 = tmp_addr; + } else { + return ERR_PTR(-EOPNOTSUPP); + } + + /* Use the same counter as the reverse direction */ + spin_lock_bh(&ct_priv->ht_lock); + rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple); + + if (IS_ERR(rev_entry)) { + spin_unlock_bh(&ct_priv->ht_lock); + goto create_counter; + } + + if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) { + ct_dbg("Using shared counter entry=0x%p rev=0x%p", entry, rev_entry); + shared_counter = rev_entry->counter; + spin_unlock_bh(&ct_priv->ht_lock); + + mlx5_tc_ct_entry_put(rev_entry); + return shared_counter; + } + + spin_unlock_bh(&ct_priv->ht_lock); + +create_counter: + + shared_counter = mlx5_tc_ct_counter_create(ct_priv); + if (IS_ERR(shared_counter)) + return shared_counter; + + shared_counter->is_shared = true; + refcount_set(&shared_counter->refcount, 1); + return shared_counter; +} + +static int +mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv, + struct flow_rule *flow_rule, + struct mlx5_ct_entry *entry, + u8 zone_restore_id) +{ + int err; + + if (nf_ct_acct_enabled(dev_net(ct_priv->netdev))) + entry->counter = mlx5_tc_ct_counter_create(ct_priv); + else + entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry); + + if (IS_ERR(entry->counter)) { + err = PTR_ERR(entry->counter); + return err; + } + + err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false, + zone_restore_id); + if (err) + goto err_orig; + + err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true, + zone_restore_id); + if (err) + goto err_nat; + + atomic_inc(&ct_priv->debugfs.stats.offloaded); + return 0; + +err_nat: + mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); +err_orig: + mlx5_tc_ct_counter_put(ct_priv, entry); + return err; +} + +static int +mlx5_tc_ct_entry_replace_rules(struct mlx5_tc_ct_priv *ct_priv, + struct flow_rule *flow_rule, + struct mlx5_ct_entry *entry, + u8 zone_restore_id) +{ + int err; + + err = mlx5_tc_ct_entry_replace_rule(ct_priv, flow_rule, entry, false, + zone_restore_id); + if (err) + return err; + + err = mlx5_tc_ct_entry_replace_rule(ct_priv, flow_rule, entry, true, + zone_restore_id); + if (err) + mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); + return err; +} + +static int +mlx5_tc_ct_block_flow_offload_replace(struct mlx5_ct_ft *ft, struct flow_rule *flow_rule, + struct mlx5_ct_entry *entry, unsigned long cookie) +{ + struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; + int err; + + err = mlx5_tc_ct_entry_replace_rules(ct_priv, flow_rule, entry, ft->zone_restore_id); + if (!err) + return 0; + + /* If failed to update the entry, then look it up again under ht_lock + * protection and properly delete it. + */ + spin_lock_bh(&ct_priv->ht_lock); + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); + if (entry) { + rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params); + spin_unlock_bh(&ct_priv->ht_lock); + mlx5_tc_ct_entry_put(entry); + } else { + spin_unlock_bh(&ct_priv->ht_lock); + } + return err; +} + +static int +mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, + struct flow_cls_offload *flow) +{ + struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow); + struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; + struct flow_action_entry *meta_action; + unsigned long cookie = flow->cookie; + struct mlx5_ct_entry *entry; + int err; + + meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule); + if (!meta_action) + return -EOPNOTSUPP; + + spin_lock_bh(&ct_priv->ht_lock); + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); + if (entry && refcount_inc_not_zero(&entry->refcnt)) { + if (entry->restore_cookie == meta_action->ct_metadata.cookie) { + spin_unlock_bh(&ct_priv->ht_lock); + mlx5_tc_ct_entry_put(entry); + return -EEXIST; + } + entry->restore_cookie = meta_action->ct_metadata.cookie; + spin_unlock_bh(&ct_priv->ht_lock); + + err = mlx5_tc_ct_block_flow_offload_replace(ft, flow_rule, entry, cookie); + mlx5_tc_ct_entry_put(entry); + return err; + } + spin_unlock_bh(&ct_priv->ht_lock); + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->tuple.zone = ft->zone; + entry->cookie = flow->cookie; + entry->restore_cookie = meta_action->ct_metadata.cookie; + refcount_set(&entry->refcnt, 2); + entry->ct_priv = ct_priv; + + err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule); + if (err) + goto err_set; + + memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple)); + err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule); + if (err) + goto err_set; + + spin_lock_bh(&ct_priv->ht_lock); + + err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node, + cts_ht_params); + if (err) + goto err_entries; + + err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht, + &entry->tuple_node, + tuples_ht_params); + if (err) + goto err_tuple; + + if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) { + err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht, + &entry->tuple_nat_node, + tuples_nat_ht_params); + if (err) + goto err_tuple_nat; + } + spin_unlock_bh(&ct_priv->ht_lock); + + err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry, + ft->zone_restore_id); + if (err) + goto err_rules; + + set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags); + mlx5_tc_ct_entry_put(entry); /* this function reference */ + + return 0; + +err_rules: + spin_lock_bh(&ct_priv->ht_lock); + if (mlx5_tc_ct_entry_has_nat(entry)) + rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, + &entry->tuple_nat_node, tuples_nat_ht_params); +err_tuple_nat: + rhashtable_remove_fast(&ct_priv->ct_tuples_ht, + &entry->tuple_node, + tuples_ht_params); +err_tuple: + rhashtable_remove_fast(&ft->ct_entries_ht, + &entry->node, + cts_ht_params); +err_entries: + spin_unlock_bh(&ct_priv->ht_lock); +err_set: + kfree(entry); + if (err != -EEXIST) + netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err); + return err; +} + +static int +mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft, + struct flow_cls_offload *flow) +{ + struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; + unsigned long cookie = flow->cookie; + struct mlx5_ct_entry *entry; + + spin_lock_bh(&ct_priv->ht_lock); + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); + if (!entry) { + spin_unlock_bh(&ct_priv->ht_lock); + return -ENOENT; + } + + if (!mlx5_tc_ct_entry_valid(entry)) { + spin_unlock_bh(&ct_priv->ht_lock); + return -EINVAL; + } + + rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params); + spin_unlock_bh(&ct_priv->ht_lock); + + mlx5_tc_ct_entry_put(entry); + + return 0; +} + +static int +mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft, + struct flow_cls_offload *f) +{ + struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; + unsigned long cookie = f->cookie; + struct mlx5_ct_entry *entry; + u64 lastuse, packets, bytes; + + spin_lock_bh(&ct_priv->ht_lock); + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); + if (!entry) { + spin_unlock_bh(&ct_priv->ht_lock); + return -ENOENT; + } + + if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) { + spin_unlock_bh(&ct_priv->ht_lock); + return -EINVAL; + } + + spin_unlock_bh(&ct_priv->ht_lock); + + mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse); + flow_stats_update(&f->stats, bytes, packets, 0, lastuse, + FLOW_ACTION_HW_STATS_DELAYED); + + mlx5_tc_ct_entry_put(entry); + return 0; +} + +static int +mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct flow_cls_offload *f = type_data; + struct mlx5_ct_ft *ft = cb_priv; + + if (type != TC_SETUP_CLSFLOWER) + return -EOPNOTSUPP; + + switch (f->command) { + case FLOW_CLS_REPLACE: + return mlx5_tc_ct_block_flow_offload_add(ft, f); + case FLOW_CLS_DESTROY: + return mlx5_tc_ct_block_flow_offload_del(ft, f); + case FLOW_CLS_STATS: + return mlx5_tc_ct_block_flow_offload_stats(ft, f); + default: + break; + } + + return -EOPNOTSUPP; +} + +static bool +mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple, + u16 zone) +{ + struct flow_keys flow_keys; + + skb_reset_network_header(skb); + skb_flow_dissect_flow_keys(skb, &flow_keys, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP); + + tuple->zone = zone; + + if (flow_keys.basic.ip_proto != IPPROTO_TCP && + flow_keys.basic.ip_proto != IPPROTO_UDP && + flow_keys.basic.ip_proto != IPPROTO_GRE) + return false; + + if (flow_keys.basic.ip_proto == IPPROTO_TCP || + flow_keys.basic.ip_proto == IPPROTO_UDP) { + tuple->port.src = flow_keys.ports.src; + tuple->port.dst = flow_keys.ports.dst; + } + tuple->n_proto = flow_keys.basic.n_proto; + tuple->ip_proto = flow_keys.basic.ip_proto; + + switch (flow_keys.basic.n_proto) { + case htons(ETH_P_IP): + tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src; + tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst; + break; + + case htons(ETH_P_IPV6): + tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src; + tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst; + break; + default: + goto out; + } + + return true; + +out: + return false; +} + +int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec) +{ + u32 ctstate = 0, ctstate_mask = 0; + + mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG, + &ctstate, &ctstate_mask); + + if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT) + return -EOPNOTSUPP; + + ctstate_mask |= MLX5_CT_STATE_TRK_BIT; + mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, + ctstate, ctstate_mask); + + return 0; +} + +void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr) +{ + if (!priv || !ct_attr->ct_labels_id) + return; + + mlx5_put_label_mapping(priv, ct_attr->ct_labels_id); +} + +int +mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct mlx5_ct_attr *ct_attr, + struct netlink_ext_ack *extack) +{ + bool trk, est, untrk, unnew, unest, new, rpl, unrpl, rel, unrel, inv, uninv; + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_dissector_key_ct *mask, *key; + u32 ctstate = 0, ctstate_mask = 0; + u16 ct_state_on, ct_state_off; + u16 ct_state, ct_state_mask; + struct flow_match_ct match; + u32 ct_labels[4]; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT)) + return 0; + + if (!priv) { + NL_SET_ERR_MSG_MOD(extack, + "offload of ct matching isn't available"); + return -EOPNOTSUPP; + } + + flow_rule_match_ct(rule, &match); + + key = match.key; + mask = match.mask; + + ct_state = key->ct_state; + ct_state_mask = mask->ct_state; + + if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED | + TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED | + TCA_FLOWER_KEY_CT_FLAGS_NEW | + TCA_FLOWER_KEY_CT_FLAGS_REPLY | + TCA_FLOWER_KEY_CT_FLAGS_RELATED | + TCA_FLOWER_KEY_CT_FLAGS_INVALID)) { + NL_SET_ERR_MSG_MOD(extack, + "only ct_state trk, est, new and rpl are supported for offload"); + return -EOPNOTSUPP; + } + + ct_state_on = ct_state & ct_state_mask; + ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask; + trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; + new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW; + est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; + rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY; + rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED; + inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID; + untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; + unnew = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_NEW; + unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; + unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY; + unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED; + uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID; + + ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0; + ctstate |= new ? MLX5_CT_STATE_NEW_BIT : 0; + ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; + ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0; + ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0; + ctstate_mask |= (unnew || new) ? MLX5_CT_STATE_NEW_BIT : 0; + ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; + ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0; + ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0; + ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0; + + if (rel) { + NL_SET_ERR_MSG_MOD(extack, + "matching on ct_state +rel isn't supported"); + return -EOPNOTSUPP; + } + + if (inv) { + NL_SET_ERR_MSG_MOD(extack, + "matching on ct_state +inv isn't supported"); + return -EOPNOTSUPP; + } + + if (mask->ct_zone) + mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, + key->ct_zone, MLX5_CT_ZONE_MASK); + if (ctstate_mask) + mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, + ctstate, ctstate_mask); + if (mask->ct_mark) + mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG, + key->ct_mark, mask->ct_mark); + if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] || + mask->ct_labels[3]) { + ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0]; + ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1]; + ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2]; + ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3]; + if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id)) + return -EOPNOTSUPP; + mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id, + MLX5_CT_LABELS_MASK); + } + + return 0; +} + +int +mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_attr *attr, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + if (!priv) { + NL_SET_ERR_MSG_MOD(extack, + "offload of ct action isn't available"); + return -EOPNOTSUPP; + } + + attr->ct_attr.ct_action |= act->ct.action; /* So we can have clear + ct */ + attr->ct_attr.zone = act->ct.zone; + if (!(act->ct.action & TCA_CT_ACT_CLEAR)) + attr->ct_attr.nf_ft = act->ct.flow_table; + attr->ct_attr.act_miss_cookie = act->miss_cookie; + + return 0; +} + +static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft, + struct mlx5_tc_ct_pre *pre_ct, + bool nat) +{ + struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; + struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; + struct mlx5_core_dev *dev = ct_priv->dev; + struct mlx5_flow_table *ft = pre_ct->ft; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_modify_hdr *mod_hdr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + u32 ctstate; + u16 zone; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + zone = ct_ft->zone & MLX5_CT_ZONE_MASK; + err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type, + ZONE_TO_REG, zone); + if (err) { + ct_dbg("Failed to set zone register mapping"); + goto err_mapping; + } + + mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type, + pre_mod_acts.num_actions, + pre_mod_acts.actions); + + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + ct_dbg("Failed to create pre ct mod hdr"); + goto err_mapping; + } + pre_ct->modify_hdr = mod_hdr; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + flow_act.modify_hdr = mod_hdr; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + + /* add flow rule */ + mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, + zone, MLX5_CT_ZONE_MASK); + ctstate = MLX5_CT_STATE_TRK_BIT; + if (nat) + ctstate |= MLX5_CT_STATE_NAT_BIT; + mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate); + + dest.ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act); + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + ct_dbg("Failed to add pre ct flow rule zone %d", zone); + goto err_flow_rule; + } + pre_ct->flow_rule = rule; + + /* add miss rule */ + dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct; + rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + ct_dbg("Failed to add pre ct miss rule zone %d", zone); + goto err_miss_rule; + } + pre_ct->miss_rule = rule; + + mlx5e_mod_hdr_dealloc(&pre_mod_acts); + kvfree(spec); + return 0; + +err_miss_rule: + mlx5_del_flow_rules(pre_ct->flow_rule); +err_flow_rule: + mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); +err_mapping: + mlx5e_mod_hdr_dealloc(&pre_mod_acts); + kvfree(spec); + return err; +} + +static void +tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft, + struct mlx5_tc_ct_pre *pre_ct) +{ + struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; + struct mlx5_core_dev *dev = ct_priv->dev; + + mlx5_del_flow_rules(pre_ct->flow_rule); + mlx5_del_flow_rules(pre_ct->miss_rule); + mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); +} + +static int +mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft, + struct mlx5_tc_ct_pre *pre_ct, + bool nat) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; + struct mlx5_core_dev *dev = ct_priv->dev; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *g; + u32 metadata_reg_c_2_mask; + u32 *flow_group_in; + void *misc; + int err; + + ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type); + if (!ns) { + err = -EOPNOTSUPP; + ct_dbg("Failed to get flow namespace"); + return err; + } + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; + ft_attr.prio = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB ? + FDB_TC_OFFLOAD : MLX5E_TC_PRIO; + ft_attr.max_fte = 2; + ft_attr.level = 1; + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + ct_dbg("Failed to create pre ct table"); + goto out_free; + } + pre_ct->ft = ft; + + /* create flow group */ + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + + misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria.misc_parameters_2); + + metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK; + metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16); + if (nat) + metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16); + + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2, + metadata_reg_c_2_mask); + + g = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + ct_dbg("Failed to create pre ct group"); + goto err_flow_grp; + } + pre_ct->flow_grp = g; + + /* create miss group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); + g = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + ct_dbg("Failed to create pre ct miss group"); + goto err_miss_grp; + } + pre_ct->miss_grp = g; + + err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat); + if (err) + goto err_add_rules; + + kvfree(flow_group_in); + return 0; + +err_add_rules: + mlx5_destroy_flow_group(pre_ct->miss_grp); +err_miss_grp: + mlx5_destroy_flow_group(pre_ct->flow_grp); +err_flow_grp: + mlx5_destroy_flow_table(ft); +out_free: + kvfree(flow_group_in); + return err; +} + +static void +mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft, + struct mlx5_tc_ct_pre *pre_ct) +{ + tc_ct_pre_ct_del_rules(ct_ft, pre_ct); + mlx5_destroy_flow_group(pre_ct->miss_grp); + mlx5_destroy_flow_group(pre_ct->flow_grp); + mlx5_destroy_flow_table(pre_ct->ft); +} + +static int +mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft) +{ + int err; + + err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false); + if (err) + return err; + + err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true); + if (err) + goto err_pre_ct_nat; + + return 0; + +err_pre_ct_nat: + mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); + return err; +} + +static void +mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft) +{ + mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat); + mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); +} + +/* To avoid false lock dependency warning set the ct_entries_ht lock + * class different than the lock class of the ht being used when deleting + * last flow from a group and then deleting a group, we get into del_sw_flow_group() + * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but + * it's different than the ht->mutex here. + */ +static struct lock_class_key ct_entries_ht_lock_key; + +static struct mlx5_ct_ft * +mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, + struct nf_flowtable *nf_ft) +{ + struct mlx5_ct_ft *ft; + int err; + + ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params); + if (ft) { + refcount_inc(&ft->refcount); + return ft; + } + + ft = kzalloc(sizeof(*ft), GFP_KERNEL); + if (!ft) + return ERR_PTR(-ENOMEM); + + err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id); + if (err) + goto err_mapping; + + ft->zone = zone; + ft->nf_ft = nf_ft; + ft->ct_priv = ct_priv; + refcount_set(&ft->refcount, 1); + + err = mlx5_tc_ct_alloc_pre_ct_tables(ft); + if (err) + goto err_alloc_pre_ct; + + err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params); + if (err) + goto err_init; + + lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key); + + err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node, + zone_params); + if (err) + goto err_insert; + + err = nf_flow_table_offload_add_cb(ft->nf_ft, + mlx5_tc_ct_block_flow_offload, ft); + if (err) + goto err_add_cb; + + return ft; + +err_add_cb: + rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); +err_insert: + rhashtable_destroy(&ft->ct_entries_ht); +err_init: + mlx5_tc_ct_free_pre_ct_tables(ft); +err_alloc_pre_ct: + mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id); +err_mapping: + kfree(ft); + return ERR_PTR(err); +} + +static void +mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg) +{ + struct mlx5_ct_entry *entry = ptr; + + mlx5_tc_ct_entry_put(entry); +} + +static void +mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) +{ + if (!refcount_dec_and_test(&ft->refcount)) + return; + + flush_workqueue(ct_priv->wq); + nf_flow_table_offload_del_cb(ft->nf_ft, + mlx5_tc_ct_block_flow_offload, ft); + rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); + rhashtable_free_and_destroy(&ft->ct_entries_ht, + mlx5_tc_ct_flush_ft_entry, + ct_priv); + mlx5_tc_ct_free_pre_ct_tables(ft); + mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id); + kfree(ft); +} + +/* We translate the tc filter with CT action to the following HW model: + * + * +-----------------------+ + * + rule (either original + + * + or post_act rule) + + * +-----------------------+ + * | set act_miss_cookie mapping + * | set fte_id + * | set tunnel_id + * | rest of actions before the CT action (for this orig/post_act rule) + * | + * +-------------+ + * | Chain 0 | + * | optimization| + * | v + * | +---------------------+ + * | + pre_ct/pre_ct_nat + if matches +----------------------+ + * | + zone+nat match +---------------->+ post_act (see below) + + * | +---------------------+ set zone +----------------------+ + * | | + * +-------------+ set zone + * | + * v + * +--------------------+ + * + CT (nat or no nat) + + * + tuple + zone match + + * +--------------------+ + * | set mark + * | set labels_id + * | set established + * | set zone_restore + * | do nat (if needed) + * v + * +--------------+ + * + post_act + rest of parsed filter's actions + * + fte_id match +------------------------> + * +--------------+ + * + */ +static int +__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_flow_attr *attr) +{ + bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT; + struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev); + int act_miss_mapping = 0, err; + struct mlx5_ct_ft *ft; + u16 zone; + + /* Register for CT established events */ + ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone, + attr->ct_attr.nf_ft); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + ct_dbg("Failed to register to ft callback"); + goto err_ft; + } + attr->ct_attr.ft = ft; + + err = mlx5e_tc_action_miss_mapping_get(ct_priv->priv, attr, attr->ct_attr.act_miss_cookie, + &act_miss_mapping); + if (err) { + ct_dbg("Failed to get register mapping for act miss"); + goto err_get_act_miss; + } + + err = mlx5e_tc_match_to_reg_set(priv->mdev, &attr->parse_attr->mod_hdr_acts, + ct_priv->ns_type, MAPPED_OBJ_TO_REG, act_miss_mapping); + if (err) { + ct_dbg("Failed to set act miss register mapping"); + goto err_mapping; + } + + /* Chain 0 sets the zone and jumps to ct table + * Other chains jump to pre_ct table to align with act_ct cached logic + */ + if (!attr->chain) { + zone = ft->zone & MLX5_CT_ZONE_MASK; + err = mlx5e_tc_match_to_reg_set(priv->mdev, &attr->parse_attr->mod_hdr_acts, + ct_priv->ns_type, ZONE_TO_REG, zone); + if (err) { + ct_dbg("Failed to set zone register mapping"); + goto err_mapping; + } + + attr->dest_ft = nat ? ct_priv->ct_nat : ct_priv->ct; + } else { + attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft; + } + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + attr->ct_attr.act_miss_mapping = act_miss_mapping; + + return 0; + +err_mapping: + mlx5e_tc_action_miss_mapping_put(ct_priv->priv, attr, act_miss_mapping); +err_get_act_miss: + mlx5_tc_ct_del_ft_cb(ct_priv, ft); +err_ft: + netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err); + return err; +} + +int +mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr) +{ + int err; + + if (!priv) + return -EOPNOTSUPP; + + if (attr->ct_attr.offloaded) + return 0; + + if (attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR) { + err = mlx5_tc_ct_entry_set_registers(priv, &attr->parse_attr->mod_hdr_acts, + 0, 0, 0, 0); + if (err) + return err; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + } + + if (!attr->ct_attr.nf_ft) { /* means only ct clear action, and not ct_clear,ct() */ + attr->ct_attr.offloaded = true; + return 0; + } + + mutex_lock(&priv->control_lock); + err = __mlx5_tc_ct_flow_offload(priv, attr); + if (!err) + attr->ct_attr.offloaded = true; + mutex_unlock(&priv->control_lock); + + return err; +} + +static void +__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_flow_attr *attr) +{ + mlx5e_tc_action_miss_mapping_put(ct_priv->priv, attr, attr->ct_attr.act_miss_mapping); + mlx5_tc_ct_del_ft_cb(ct_priv, attr->ct_attr.ft); +} + +void +mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_attr *attr) +{ + if (!attr->ct_attr.offloaded) /* no ct action, return */ + return; + if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */ + return; + + mutex_lock(&priv->control_lock); + __mlx5_tc_ct_delete_flow(priv, attr); + mutex_unlock(&priv->control_lock); +} + +static int +mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv *ct_priv) +{ + struct mlx5_flow_table *post_ct = mlx5e_tc_post_act_get_ft(ct_priv->post_act); + struct mlx5_ct_fs_ops *fs_ops = mlx5_ct_fs_dmfs_ops_get(); + int err; + + if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB && + ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) { + ct_dbg("Using SMFS ct flow steering provider"); + fs_ops = mlx5_ct_fs_smfs_ops_get(); + } + + ct_priv->fs = kzalloc(sizeof(*ct_priv->fs) + fs_ops->priv_size, GFP_KERNEL); + if (!ct_priv->fs) + return -ENOMEM; + + ct_priv->fs->netdev = ct_priv->netdev; + ct_priv->fs->dev = ct_priv->dev; + ct_priv->fs_ops = fs_ops; + + err = ct_priv->fs_ops->init(ct_priv->fs, ct_priv->ct, ct_priv->ct_nat, post_ct); + if (err) + goto err_init; + + return 0; + +err_init: + kfree(ct_priv->fs); + return err; +} + +static int +mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw, + const char **err_msg) +{ + if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) { + /* vlan workaround should be avoided for multi chain rules. + * This is just a sanity check as pop vlan action should + * be supported by any FW that supports ignore_flow_level + */ + + *err_msg = "firmware vlan actions support is missing"; + return -EOPNOTSUPP; + } + + if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, + fdb_modify_header_fwd_to_table)) { + /* CT always writes to registers which are mod header actions. + * Therefore, mod header and goto is required + */ + + *err_msg = "firmware fwd and modify support is missing"; + return -EOPNOTSUPP; + } + + if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) { + *err_msg = "register loopback isn't supported"; + return -EOPNOTSUPP; + } + + return 0; +} + +static int +mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + const char *err_msg = NULL; + int err = 0; + + if (IS_ERR_OR_NULL(post_act)) { + /* Ignore_flow_level support isn't supported by default for VFs and so post_act + * won't be supported. Skip showing error msg. + */ + if (priv->mdev->coredev_type == MLX5_COREDEV_PF) + err_msg = "post action is missing"; + err = -EOPNOTSUPP; + goto out_err; + } + + if (ns_type == MLX5_FLOW_NAMESPACE_FDB) + err = mlx5_tc_ct_init_check_esw_support(esw, &err_msg); + +out_err: + if (err && err_msg) + netdev_dbg(priv->netdev, "tc ct offload not supported, %s\n", err_msg); + return err; +} + +static void +mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv) +{ + struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs; + + ct_dbgfs->root = debugfs_create_dir("ct", mlx5_debugfs_get_dev_root(ct_priv->dev)); + debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root, + &ct_dbgfs->stats.offloaded); + debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root, + &ct_dbgfs->stats.rx_dropped); +} + +static void +mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv *ct_priv) +{ + debugfs_remove_recursive(ct_priv->debugfs.root); +} + +#define INIT_ERR_PREFIX "tc ct offload init failed" + +struct mlx5_tc_ct_priv * +mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, + struct mod_hdr_tbl *mod_hdr, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act) +{ + struct mlx5_tc_ct_priv *ct_priv; + struct mlx5_core_dev *dev; + u64 mapping_id; + int err; + + dev = priv->mdev; + err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act); + if (err) + goto err_support; + + ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL); + if (!ct_priv) + goto err_alloc; + + mapping_id = mlx5_query_nic_system_image_guid(dev); + + ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE, + sizeof(u16), 0, true); + if (IS_ERR(ct_priv->zone_mapping)) { + err = PTR_ERR(ct_priv->zone_mapping); + goto err_mapping_zone; + } + + ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS, + sizeof(u32) * 4, 0, true); + if (IS_ERR(ct_priv->labels_mapping)) { + err = PTR_ERR(ct_priv->labels_mapping); + goto err_mapping_labels; + } + + spin_lock_init(&ct_priv->ht_lock); + ct_priv->priv = priv; + ct_priv->ns_type = ns_type; + ct_priv->chains = chains; + ct_priv->netdev = priv->netdev; + ct_priv->dev = priv->mdev; + ct_priv->mod_hdr_tbl = mod_hdr; + ct_priv->ct = mlx5_chains_create_global_table(chains); + if (IS_ERR(ct_priv->ct)) { + err = PTR_ERR(ct_priv->ct); + mlx5_core_warn(dev, + "%s, failed to create ct table err: %d\n", + INIT_ERR_PREFIX, err); + goto err_ct_tbl; + } + + ct_priv->ct_nat = mlx5_chains_create_global_table(chains); + if (IS_ERR(ct_priv->ct_nat)) { + err = PTR_ERR(ct_priv->ct_nat); + mlx5_core_warn(dev, + "%s, failed to create ct nat table err: %d\n", + INIT_ERR_PREFIX, err); + goto err_ct_nat_tbl; + } + + ct_priv->post_act = post_act; + mutex_init(&ct_priv->control_lock); + if (rhashtable_init(&ct_priv->zone_ht, &zone_params)) + goto err_ct_zone_ht; + if (rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params)) + goto err_ct_tuples_ht; + if (rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params)) + goto err_ct_tuples_nat_ht; + + ct_priv->wq = alloc_ordered_workqueue("mlx5e_ct_priv_wq", 0); + if (!ct_priv->wq) { + err = -ENOMEM; + goto err_wq; + } + + err = mlx5_tc_ct_fs_init(ct_priv); + if (err) + goto err_init_fs; + + mlx5_ct_tc_create_dbgfs(ct_priv); + return ct_priv; + +err_init_fs: + destroy_workqueue(ct_priv->wq); +err_wq: + rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); +err_ct_tuples_nat_ht: + rhashtable_destroy(&ct_priv->ct_tuples_ht); +err_ct_tuples_ht: + rhashtable_destroy(&ct_priv->zone_ht); +err_ct_zone_ht: + mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat); +err_ct_nat_tbl: + mlx5_chains_destroy_global_table(chains, ct_priv->ct); +err_ct_tbl: + mapping_destroy(ct_priv->labels_mapping); +err_mapping_labels: + mapping_destroy(ct_priv->zone_mapping); +err_mapping_zone: + kfree(ct_priv); +err_alloc: +err_support: + + return NULL; +} + +void +mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv) +{ + struct mlx5_fs_chains *chains; + + if (!ct_priv) + return; + + destroy_workqueue(ct_priv->wq); + mlx5_ct_tc_remove_dbgfs(ct_priv); + chains = ct_priv->chains; + + ct_priv->fs_ops->destroy(ct_priv->fs); + kfree(ct_priv->fs); + + mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat); + mlx5_chains_destroy_global_table(chains, ct_priv->ct); + mapping_destroy(ct_priv->zone_mapping); + mapping_destroy(ct_priv->labels_mapping); + + rhashtable_destroy(&ct_priv->ct_tuples_ht); + rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); + rhashtable_destroy(&ct_priv->zone_ht); + mutex_destroy(&ct_priv->control_lock); + kfree(ct_priv); +} + +bool +mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, + struct sk_buff *skb, u8 zone_restore_id) +{ + struct mlx5_ct_tuple tuple = {}; + struct mlx5_ct_entry *entry; + u16 zone; + + if (!ct_priv || !zone_restore_id) + return true; + + if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone)) + goto out_inc_drop; + + if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone)) + goto out_inc_drop; + + spin_lock(&ct_priv->ht_lock); + + entry = mlx5_tc_ct_entry_get(ct_priv, &tuple); + if (!entry) { + spin_unlock(&ct_priv->ht_lock); + goto out_inc_drop; + } + + if (IS_ERR(entry)) { + spin_unlock(&ct_priv->ht_lock); + goto out_inc_drop; + } + spin_unlock(&ct_priv->ht_lock); + + tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie); + __mlx5_tc_ct_entry_put(entry); + + return true; + +out_inc_drop: + atomic_inc(&ct_priv->debugfs.stats.rx_dropped); + return false; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h new file mode 100644 index 0000000000..b66c5f9806 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -0,0 +1,206 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TC_CT_H__ +#define __MLX5_EN_TC_CT_H__ + +#include +#include +#include + +#include "en.h" + +struct mlx5_flow_attr; +struct mlx5e_tc_mod_hdr_acts; +struct mlx5_rep_uplink_priv; +struct mlx5e_tc_flow; +struct mlx5e_priv; + +struct mlx5_fs_chains; +struct mlx5_tc_ct_priv; +struct mlx5_ct_flow; + +struct nf_flowtable; + +struct mlx5_ct_attr { + u16 zone; + u16 ct_action; + struct nf_flowtable *nf_ft; + u32 ct_labels_id; + u32 act_miss_mapping; + u64 act_miss_cookie; + bool offloaded; + struct mlx5_ct_ft *ft; +}; + +#define zone_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_2,\ + .moffset = 0,\ + .mlen = 16,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_2),\ +} + +#define ctstate_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_2,\ + .moffset = 16,\ + .mlen = 16,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_2),\ +} + +#define mark_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_3,\ + .moffset = 0,\ + .mlen = 32,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_3),\ +} + +#define labels_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_4,\ + .moffset = 0,\ + .mlen = 32,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_4),\ +} + +/* 8 LSB of metadata C5 are reserved for packet color */ +#define fteid_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_5,\ + .moffset = 8,\ + .mlen = 24,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_5),\ +} + +#define zone_restore_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,\ + .moffset = 0,\ + .mlen = ESW_ZONE_ID_BITS,\ + .soffset = MLX5_BYTE_OFF(fte_match_param,\ + misc_parameters_2.metadata_reg_c_1),\ +} + +#define nic_zone_restore_to_reg_ct {\ + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,\ + .moffset = 16,\ + .mlen = ESW_ZONE_ID_BITS,\ +} + +#define MLX5_CT_ZONE_BITS MLX5_REG_MAPPING_MBITS(ZONE_TO_REG) +#define MLX5_CT_ZONE_MASK MLX5_REG_MAPPING_MASK(ZONE_TO_REG) + +#if IS_ENABLED(CONFIG_MLX5_TC_CT) + +struct mlx5_tc_ct_priv * +mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, + struct mod_hdr_tbl *mod_hdr, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act); +void +mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv); + +void +mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr); + +int +mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct mlx5_ct_attr *ct_attr, + struct netlink_ext_ack *extack); +int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec); +int +mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_attr *attr, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack); + +int +mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr); + +void +mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_attr *attr); + +bool +mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, + struct sk_buff *skb, u8 zone_restore_id); + +#else /* CONFIG_MLX5_TC_CT */ + +static inline struct mlx5_tc_ct_priv * +mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, + struct mod_hdr_tbl *mod_hdr, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act) +{ + return NULL; +} + +static inline void +mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv) +{ +} + +static inline void +mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr) {} + +static inline int +mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct mlx5_ct_attr *ct_attr, + struct netlink_ext_ack *extack) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT)) + return 0; + + NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled."); + return -EOPNOTSUPP; +} + +static inline int +mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec) +{ + return 0; +} + +static inline int +mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_attr *attr, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + NL_SET_ERR_MSG_MOD(extack, "mlx5 tc ct offload isn't enabled."); + return -EOPNOTSUPP; +} + +static inline int +mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_attr *attr) +{ + return -EOPNOTSUPP; +} + +static inline void +mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, + struct mlx5_flow_attr *attr) +{ +} + +static inline bool +mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, + struct sk_buff *skb, u8 zone_restore_id) +{ + if (!zone_restore_id) + return true; + + return false; +} + +#endif /* !IS_ENABLED(CONFIG_MLX5_TC_CT) */ +#endif /* __MLX5_EN_TC_CT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h new file mode 100644 index 0000000000..6cc23af66b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h @@ -0,0 +1,212 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TC_PRIV_H__ +#define __MLX5_EN_TC_PRIV_H__ + +#include "en_tc.h" +#include "en/tc/act/act.h" + +#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1) + +#define MLX5E_TC_MAX_SPLITS 1 + + +enum { + MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT, + MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT, + MLX5E_TC_FLOW_FLAG_ESWITCH = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT, + MLX5E_TC_FLOW_FLAG_FT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT, + MLX5E_TC_FLOW_FLAG_NIC = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT, + MLX5E_TC_FLOW_FLAG_OFFLOADED = MLX5E_TC_FLOW_BASE, + MLX5E_TC_FLOW_FLAG_HAIRPIN = MLX5E_TC_FLOW_BASE + 1, + MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS = MLX5E_TC_FLOW_BASE + 2, + MLX5E_TC_FLOW_FLAG_SLOW = MLX5E_TC_FLOW_BASE + 3, + MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4, + MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5, + MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6, + MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 7, + MLX5E_TC_FLOW_FLAG_TUN_RX = MLX5E_TC_FLOW_BASE + 8, + MLX5E_TC_FLOW_FLAG_FAILED = MLX5E_TC_FLOW_BASE + 9, + MLX5E_TC_FLOW_FLAG_SAMPLE = MLX5E_TC_FLOW_BASE + 10, + MLX5E_TC_FLOW_FLAG_USE_ACT_STATS = MLX5E_TC_FLOW_BASE + 11, +}; + +struct mlx5e_tc_flow_parse_attr { + const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; + struct mlx5e_mpls_info mpls_info[MLX5_MAX_FLOW_FWD_VPORTS]; + struct net_device *filter_dev; + struct mlx5_flow_spec spec; + struct pedit_headers_action hdrs[__PEDIT_CMD_MAX]; + struct mlx5e_tc_mod_hdr_acts mod_hdr_acts; + int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; + struct mlx5e_tc_act_parse_state parse_state; +}; + +struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc); + +/* Helper struct for accessing a struct containing list_head array. + * Containing struct + * |- Helper array + * [0] Helper item 0 + * |- list_head item 0 + * |- index (0) + * [1] Helper item 1 + * |- list_head item 1 + * |- index (1) + * To access the containing struct from one of the list_head items: + * 1. Get the helper item from the list_head item using + * helper item = + * container_of(list_head item, helper struct type, list_head field) + * 2. Get the contining struct from the helper item and its index in the array: + * containing struct = + * container_of(helper item, containing struct type, helper field[index]) + */ +struct encap_flow_item { + struct mlx5e_encap_entry *e; /* attached encap instance */ + struct list_head list; + int index; +}; + +struct encap_route_flow_item { + struct mlx5e_route_entry *r; /* attached route instance */ + int index; +}; + +struct mlx5e_tc_flow { + struct rhash_head node; + struct mlx5e_priv *priv; + u64 cookie; + unsigned long flags; + struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1]; + + /* flows sharing the same reformat object - currently mpls decap */ + struct list_head l3_to_l2_reformat; + struct mlx5e_decap_entry *decap_reformat; + + /* flows sharing same route entry */ + struct list_head decap_routes; + struct mlx5e_route_entry *decap_route; + struct encap_route_flow_item encap_routes[MLX5_MAX_FLOW_FWD_VPORTS]; + + /* Flow can be associated with multiple encap IDs. + * The number of encaps is bounded by the number of supported + * destinations. + */ + struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS]; + struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */ + struct list_head hairpin; /* flows sharing the same hairpin */ + struct list_head peer[MLX5_MAX_PORTS]; /* flows with peer flow */ + struct list_head unready; /* flows not ready to be offloaded (e.g + * due to missing route) + */ + struct list_head peer_flows; /* flows on peer */ + struct net_device *orig_dev; /* netdev adding flow first */ + int tmp_entry_index; + struct list_head tmp_list; /* temporary flow list used by neigh update */ + refcount_t refcnt; + struct rcu_head rcu_head; + struct completion init_done; + struct completion del_hw_done; + struct mlx5_flow_attr *attr; + struct list_head attrs; + u32 chain_mapping; +}; + +struct mlx5_flow_handle * +mlx5e_tc_rule_offload(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); + +void +mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr); + +u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer); + +struct mlx5_flow_handle * +mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); + +struct mlx5_flow_attr * +mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow); + +void mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow); +int mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow); + +bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow); +bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow); +bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow); +int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow); +bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv); + +static inline void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag) +{ + /* Complete all memory stores before setting bit. */ + smp_mb__before_atomic(); + set_bit(flag, &flow->flags); +} + +#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag) + +static inline bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow, + unsigned long flag) +{ + /* test_and_set_bit() provides all necessary barriers */ + return test_and_set_bit(flag, &flow->flags); +} + +#define flow_flag_test_and_set(flow, flag) \ + __flow_flag_test_and_set(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + +static inline void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag) +{ + /* Complete all memory stores before clearing bit. */ + smp_mb__before_atomic(); + clear_bit(flag, &flow->flags); +} + +#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + +static inline bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag) +{ + bool ret = test_bit(flag, &flow->flags); + + /* Read fields of flow structure only after checking flags. */ + smp_mb__after_atomic(); + return ret; +} + +#define flow_flag_test(flow, flag) __flow_flag_test(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + +void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow); +struct mlx5_flow_handle * +mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec); + +void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr); + +struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow); +void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow); + +struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow); + +struct mlx5e_tc_int_port_priv * +mlx5e_get_int_port_priv(struct mlx5e_priv *priv); + +struct mlx5e_flow_meters *mlx5e_get_flow_meters(struct mlx5_core_dev *dev); + +void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec); +void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec); + +#endif /* __MLX5_EN_TC_PRIV_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c new file mode 100644 index 0000000000..00a04fdd75 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -0,0 +1,991 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include +#include +#include +#include +#include +#include "en/tc_tun.h" +#include "en/tc_priv.h" +#include "en_tc.h" +#include "rep/tc.h" +#include "rep/neigh.h" +#include "lag/lag.h" +#include "lag/mp.h" + +struct mlx5e_tc_tun_route_attr { + struct net_device *out_dev; + struct net_device *route_dev; + union { + struct flowi4 fl4; + struct flowi6 fl6; + } fl; + struct neighbour *n; + u8 ttl; +}; + +#define TC_TUN_ROUTE_ATTR_INIT(name) struct mlx5e_tc_tun_route_attr name = {} + +static void mlx5e_tc_tun_route_attr_cleanup(struct mlx5e_tc_tun_route_attr *attr) +{ + if (attr->n) + neigh_release(attr->n); + if (attr->route_dev) + dev_put(attr->route_dev); +} + +struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev) +{ + if (netif_is_vxlan(tunnel_dev)) + return &vxlan_tunnel; + else if (netif_is_geneve(tunnel_dev)) + return &geneve_tunnel; + else if (netif_is_gretap(tunnel_dev) || + netif_is_ip6gretap(tunnel_dev)) + return &gre_tunnel; + else if (netif_is_bareudp(tunnel_dev)) + return &mplsoudp_tunnel; + else + return NULL; +} + +static int get_route_and_out_devs(struct mlx5e_priv *priv, + struct net_device *dev, + struct net_device **route_dev, + struct net_device **out_dev) +{ + struct net_device *uplink_dev, *uplink_upper, *real_dev; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + bool dst_is_lag_dev; + + real_dev = is_vlan_dev(dev) ? vlan_dev_real_dev(dev) : dev; + uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + + rcu_read_lock(); + uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev); + /* mlx5_lag_is_sriov() is a blocking function which can't be called + * while holding rcu read lock. Take the net_device for correctness + * sake. + */ + if (uplink_upper) + dev_hold(uplink_upper); + rcu_read_unlock(); + + dst_is_lag_dev = (uplink_upper && + netif_is_lag_master(uplink_upper) && + real_dev == uplink_upper && + mlx5_lag_is_sriov(priv->mdev)); + if (uplink_upper) + dev_put(uplink_upper); + + /* if the egress device isn't on the same HW e-switch or + * it's a LAG device, use the uplink + */ + *route_dev = dev; + if (!netdev_port_same_parent_id(priv->netdev, real_dev) || + dst_is_lag_dev || is_vlan_dev(*route_dev) || + netif_is_ovs_master(*route_dev)) + *out_dev = uplink_dev; + else if (mlx5e_eswitch_rep(dev) && + mlx5e_is_valid_eswitch_fwd_dev(priv, dev)) + *out_dev = *route_dev; + else + return -EOPNOTSUPP; + + if (!mlx5e_eswitch_uplink_rep(*out_dev)) + return -EOPNOTSUPP; + + if (mlx5e_eswitch_uplink_rep(priv->netdev) && *out_dev != priv->netdev && + !mlx5_lag_is_mpesw(priv->mdev)) + return -EOPNOTSUPP; + + return 0; +} + +static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv, + struct net_device *dev, + struct mlx5e_tc_tun_route_attr *attr) +{ + struct net_device *route_dev; + struct net_device *out_dev; + struct neighbour *n; + struct rtable *rt; + +#if IS_ENABLED(CONFIG_INET) + struct mlx5_core_dev *mdev = priv->mdev; + struct net_device *uplink_dev; + int ret; + + if (mlx5_lag_is_multipath(mdev)) { + struct mlx5_eswitch *esw = mdev->priv.eswitch; + + uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + attr->fl.fl4.flowi4_oif = uplink_dev->ifindex; + } else { + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(dev); + + if (tunnel && tunnel->get_remote_ifindex) + attr->fl.fl4.flowi4_oif = tunnel->get_remote_ifindex(dev); + } + + rt = ip_route_output_key(dev_net(dev), &attr->fl.fl4); + if (IS_ERR(rt)) + return PTR_ERR(rt); + + if (rt->rt_type != RTN_UNICAST) { + ret = -ENETUNREACH; + goto err_rt_release; + } + + if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) { + ret = -ENETUNREACH; + goto err_rt_release; + } +#else + return -EOPNOTSUPP; +#endif + + ret = get_route_and_out_devs(priv, rt->dst.dev, &route_dev, &out_dev); + if (ret < 0) + goto err_rt_release; + dev_hold(route_dev); + + if (!attr->ttl) + attr->ttl = ip4_dst_hoplimit(&rt->dst); + n = dst_neigh_lookup(&rt->dst, &attr->fl.fl4.daddr); + if (!n) { + ret = -ENOMEM; + goto err_dev_release; + } + + ip_rt_put(rt); + attr->route_dev = route_dev; + attr->out_dev = out_dev; + attr->n = n; + return 0; + +err_dev_release: + dev_put(route_dev); +err_rt_release: + ip_rt_put(rt); + return ret; +} + +static void mlx5e_route_lookup_ipv4_put(struct mlx5e_tc_tun_route_attr *attr) +{ + mlx5e_tc_tun_route_attr_cleanup(attr); +} + +static const char *mlx5e_netdev_kind(struct net_device *dev) +{ + if (dev->rtnl_link_ops) + return dev->rtnl_link_ops->kind; + else + return "unknown"; +} + +static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto, + struct mlx5e_encap_entry *e) +{ + if (!e->tunnel) { + pr_warn("mlx5: Cannot generate tunnel header for this tunnel\n"); + return -EOPNOTSUPP; + } + + return e->tunnel->generate_ip_tun_hdr(buf, ip_proto, e); +} + +static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev, + struct mlx5e_encap_entry *e, + u16 proto) +{ + struct ethhdr *eth = (struct ethhdr *)buf; + char *ip; + + ether_addr_copy(eth->h_dest, e->h_dest); + ether_addr_copy(eth->h_source, dev->dev_addr); + if (is_vlan_dev(dev)) { + struct vlan_hdr *vlan = (struct vlan_hdr *) + ((char *)eth + ETH_HLEN); + ip = (char *)vlan + VLAN_HLEN; + eth->h_proto = vlan_dev_vlan_proto(dev); + vlan->h_vlan_TCI = htons(vlan_dev_vlan_id(dev)); + vlan->h_vlan_encapsulated_proto = htons(proto); + } else { + eth->h_proto = htons(proto); + ip = (char *)eth + ETH_HLEN; + } + + return ip; +} + +int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + const struct ip_tunnel_key *tun_key = &e->tun_info->key; + struct mlx5_pkt_reformat_params reformat_params; + struct mlx5e_neigh m_neigh = {}; + TC_TUN_ROUTE_ATTR_INIT(attr); + int ipv4_encap_size; + char *encap_header; + struct iphdr *ip; + u8 nud_state; + int err; + + /* add the IP fields */ + attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK; + attr.fl.fl4.daddr = tun_key->u.ipv4.dst; + attr.fl.fl4.saddr = tun_key->u.ipv4.src; + attr.ttl = tun_key->ttl; + + err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &attr); + if (err) + return err; + + ipv4_encap_size = + (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + + sizeof(struct iphdr) + + e->tunnel->calc_hlen(e); + + if (max_encap_size < ipv4_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv4_encap_size, max_encap_size); + err = -EOPNOTSUPP; + goto release_neigh; + } + + encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL); + if (!encap_header) { + err = -ENOMEM; + goto release_neigh; + } + + m_neigh.family = attr.n->ops->family; + memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len); + e->out_dev = attr.out_dev; + e->route_dev_ifindex = attr.route_dev->ifindex; + + /* It's important to add the neigh to the hash table before checking + * the neigh validity state. So if we'll get a notification, in case the + * neigh changes it's validity state, we would find the relevant neigh + * in the hash. + */ + err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev); + if (err) + goto free_encap; + + read_lock_bh(&attr.n->lock); + nud_state = attr.n->nud_state; + ether_addr_copy(e->h_dest, attr.n->ha); + read_unlock_bh(&attr.n->lock); + + /* add ethernet header */ + ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e, + ETH_P_IP); + + /* add ip header */ + ip->tos = tun_key->tos; + ip->version = 0x4; + ip->ihl = 0x5; + ip->ttl = attr.ttl; + ip->daddr = attr.fl.fl4.daddr; + ip->saddr = attr.fl.fl4.saddr; + + /* add tunneling protocol header */ + err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr), + &ip->protocol, e); + if (err) + goto destroy_neigh_entry; + + e->encap_size = ipv4_encap_size; + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(attr.n, NULL); + /* the encap entry will be made valid on neigh update event + * and not used before that. + */ + goto release_neigh; + } + + memset(&reformat_params, 0, sizeof(reformat_params)); + reformat_params.type = e->reformat_type; + reformat_params.size = ipv4_encap_size; + reformat_params.data = encap_header; + e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(e->pkt_reformat)) { + err = PTR_ERR(e->pkt_reformat); + goto destroy_neigh_entry; + } + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); + mlx5e_route_lookup_ipv4_put(&attr); + return err; + +destroy_neigh_entry: + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); +free_encap: + kfree(encap_header); +release_neigh: + mlx5e_route_lookup_ipv4_put(&attr); + return err; +} + +int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + const struct ip_tunnel_key *tun_key = &e->tun_info->key; + struct mlx5_pkt_reformat_params reformat_params; + TC_TUN_ROUTE_ATTR_INIT(attr); + int ipv4_encap_size; + char *encap_header; + struct iphdr *ip; + u8 nud_state; + int err; + + /* add the IP fields */ + attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK; + attr.fl.fl4.daddr = tun_key->u.ipv4.dst; + attr.fl.fl4.saddr = tun_key->u.ipv4.src; + attr.ttl = tun_key->ttl; + + err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &attr); + if (err) + return err; + + ipv4_encap_size = + (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + + sizeof(struct iphdr) + + e->tunnel->calc_hlen(e); + + if (max_encap_size < ipv4_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv4_encap_size, max_encap_size); + err = -EOPNOTSUPP; + goto release_neigh; + } + + encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL); + if (!encap_header) { + err = -ENOMEM; + goto release_neigh; + } + + e->route_dev_ifindex = attr.route_dev->ifindex; + + read_lock_bh(&attr.n->lock); + nud_state = attr.n->nud_state; + ether_addr_copy(e->h_dest, attr.n->ha); + WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev); + read_unlock_bh(&attr.n->lock); + + /* add ethernet header */ + ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e, + ETH_P_IP); + + /* add ip header */ + ip->tos = tun_key->tos; + ip->version = 0x4; + ip->ihl = 0x5; + ip->ttl = attr.ttl; + ip->daddr = attr.fl.fl4.daddr; + ip->saddr = attr.fl.fl4.saddr; + + /* add tunneling protocol header */ + err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr), + &ip->protocol, e); + if (err) + goto free_encap; + + e->encap_size = ipv4_encap_size; + kfree(e->encap_header); + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(attr.n, NULL); + /* the encap entry will be made valid on neigh update event + * and not used before that. + */ + goto release_neigh; + } + + memset(&reformat_params, 0, sizeof(reformat_params)); + reformat_params.type = e->reformat_type; + reformat_params.size = ipv4_encap_size; + reformat_params.data = encap_header; + e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(e->pkt_reformat)) { + err = PTR_ERR(e->pkt_reformat); + goto free_encap; + } + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); + mlx5e_route_lookup_ipv4_put(&attr); + return err; + +free_encap: + kfree(encap_header); +release_neigh: + mlx5e_route_lookup_ipv4_put(&attr); + return err; +} + +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) +static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv, + struct net_device *dev, + struct mlx5e_tc_tun_route_attr *attr) +{ + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(dev); + struct net_device *route_dev; + struct net_device *out_dev; + struct dst_entry *dst; + struct neighbour *n; + int ret; + + if (tunnel && tunnel->get_remote_ifindex) + attr->fl.fl6.flowi6_oif = tunnel->get_remote_ifindex(dev); + dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(dev), NULL, &attr->fl.fl6, + NULL); + if (IS_ERR(dst)) + return PTR_ERR(dst); + + if (!attr->ttl) + attr->ttl = ip6_dst_hoplimit(dst); + + ret = get_route_and_out_devs(priv, dst->dev, &route_dev, &out_dev); + if (ret < 0) + goto err_dst_release; + + dev_hold(route_dev); + n = dst_neigh_lookup(dst, &attr->fl.fl6.daddr); + if (!n) { + ret = -ENOMEM; + goto err_dev_release; + } + + dst_release(dst); + attr->out_dev = out_dev; + attr->route_dev = route_dev; + attr->n = n; + return 0; + +err_dev_release: + dev_put(route_dev); +err_dst_release: + dst_release(dst); + return ret; +} + +static void mlx5e_route_lookup_ipv6_put(struct mlx5e_tc_tun_route_attr *attr) +{ + mlx5e_tc_tun_route_attr_cleanup(attr); +} + +int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + const struct ip_tunnel_key *tun_key = &e->tun_info->key; + struct mlx5_pkt_reformat_params reformat_params; + struct mlx5e_neigh m_neigh = {}; + TC_TUN_ROUTE_ATTR_INIT(attr); + struct ipv6hdr *ip6h; + int ipv6_encap_size; + char *encap_header; + u8 nud_state; + int err; + + attr.ttl = tun_key->ttl; + attr.fl.fl6.flowlabel = ip6_make_flowinfo(tun_key->tos, tun_key->label); + attr.fl.fl6.daddr = tun_key->u.ipv6.dst; + attr.fl.fl6.saddr = tun_key->u.ipv6.src; + + err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &attr); + if (err) + return err; + + ipv6_encap_size = + (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + + sizeof(struct ipv6hdr) + + e->tunnel->calc_hlen(e); + + if (max_encap_size < ipv6_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv6_encap_size, max_encap_size); + err = -EOPNOTSUPP; + goto release_neigh; + } + + encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL); + if (!encap_header) { + err = -ENOMEM; + goto release_neigh; + } + + m_neigh.family = attr.n->ops->family; + memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len); + e->out_dev = attr.out_dev; + e->route_dev_ifindex = attr.route_dev->ifindex; + + /* It's important to add the neigh to the hash table before checking + * the neigh validity state. So if we'll get a notification, in case the + * neigh changes it's validity state, we would find the relevant neigh + * in the hash. + */ + err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev); + if (err) + goto free_encap; + + read_lock_bh(&attr.n->lock); + nud_state = attr.n->nud_state; + ether_addr_copy(e->h_dest, attr.n->ha); + read_unlock_bh(&attr.n->lock); + + /* add ethernet header */ + ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e, + ETH_P_IPV6); + + /* add ip header */ + ip6_flow_hdr(ip6h, tun_key->tos, 0); + /* the HW fills up ipv6 payload len */ + ip6h->hop_limit = attr.ttl; + ip6h->daddr = attr.fl.fl6.daddr; + ip6h->saddr = attr.fl.fl6.saddr; + + /* add tunneling protocol header */ + err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr), + &ip6h->nexthdr, e); + if (err) + goto destroy_neigh_entry; + + e->encap_size = ipv6_encap_size; + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(attr.n, NULL); + /* the encap entry will be made valid on neigh update event + * and not used before that. + */ + goto release_neigh; + } + + memset(&reformat_params, 0, sizeof(reformat_params)); + reformat_params.type = e->reformat_type; + reformat_params.size = ipv6_encap_size; + reformat_params.data = encap_header; + e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(e->pkt_reformat)) { + err = PTR_ERR(e->pkt_reformat); + goto destroy_neigh_entry; + } + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); + mlx5e_route_lookup_ipv6_put(&attr); + return err; + +destroy_neigh_entry: + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); +free_encap: + kfree(encap_header); +release_neigh: + mlx5e_route_lookup_ipv6_put(&attr); + return err; +} + +int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + const struct ip_tunnel_key *tun_key = &e->tun_info->key; + struct mlx5_pkt_reformat_params reformat_params; + TC_TUN_ROUTE_ATTR_INIT(attr); + struct ipv6hdr *ip6h; + int ipv6_encap_size; + char *encap_header; + u8 nud_state; + int err; + + attr.ttl = tun_key->ttl; + + attr.fl.fl6.flowlabel = ip6_make_flowinfo(tun_key->tos, tun_key->label); + attr.fl.fl6.daddr = tun_key->u.ipv6.dst; + attr.fl.fl6.saddr = tun_key->u.ipv6.src; + + err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &attr); + if (err) + return err; + + ipv6_encap_size = + (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + + sizeof(struct ipv6hdr) + + e->tunnel->calc_hlen(e); + + if (max_encap_size < ipv6_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv6_encap_size, max_encap_size); + err = -EOPNOTSUPP; + goto release_neigh; + } + + encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL); + if (!encap_header) { + err = -ENOMEM; + goto release_neigh; + } + + e->route_dev_ifindex = attr.route_dev->ifindex; + + read_lock_bh(&attr.n->lock); + nud_state = attr.n->nud_state; + ether_addr_copy(e->h_dest, attr.n->ha); + WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev); + read_unlock_bh(&attr.n->lock); + + /* add ethernet header */ + ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e, + ETH_P_IPV6); + + /* add ip header */ + ip6_flow_hdr(ip6h, tun_key->tos, 0); + /* the HW fills up ipv6 payload len */ + ip6h->hop_limit = attr.ttl; + ip6h->daddr = attr.fl.fl6.daddr; + ip6h->saddr = attr.fl.fl6.saddr; + + /* add tunneling protocol header */ + err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr), + &ip6h->nexthdr, e); + if (err) + goto free_encap; + + e->encap_size = ipv6_encap_size; + kfree(e->encap_header); + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(attr.n, NULL); + /* the encap entry will be made valid on neigh update event + * and not used before that. + */ + goto release_neigh; + } + + memset(&reformat_params, 0, sizeof(reformat_params)); + reformat_params.type = e->reformat_type; + reformat_params.size = ipv6_encap_size; + reformat_params.data = encap_header; + e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(e->pkt_reformat)) { + err = PTR_ERR(e->pkt_reformat); + goto free_encap; + } + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); + mlx5e_route_lookup_ipv6_put(&attr); + return err; + +free_encap: + kfree(encap_header); +release_neigh: + mlx5e_route_lookup_ipv6_put(&attr); + return err; +} +#endif + +int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *flow_attr, + struct net_device *filter_dev) +{ + struct mlx5_esw_flow_attr *esw_attr = flow_attr->esw_attr; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_int_port *int_port; + TC_TUN_ROUTE_ATTR_INIT(attr); + u16 vport_num; + int err = 0; + + if (flow_attr->tun_ip_version == 4) { + /* Addresses are swapped for decap */ + attr.fl.fl4.saddr = esw_attr->rx_tun_attr->dst_ip.v4; + attr.fl.fl4.daddr = esw_attr->rx_tun_attr->src_ip.v4; + err = mlx5e_route_lookup_ipv4_get(priv, filter_dev, &attr); + } +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + else if (flow_attr->tun_ip_version == 6) { + /* Addresses are swapped for decap */ + attr.fl.fl6.saddr = esw_attr->rx_tun_attr->dst_ip.v6; + attr.fl.fl6.daddr = esw_attr->rx_tun_attr->src_ip.v6; + err = mlx5e_route_lookup_ipv6_get(priv, filter_dev, &attr); + } +#endif + else + return 0; + + if (err) + return err; + + if (attr.route_dev->netdev_ops == &mlx5e_netdev_ops && + mlx5e_tc_is_vf_tunnel(attr.out_dev, attr.route_dev)) { + err = mlx5e_tc_query_route_vport(attr.out_dev, attr.route_dev, &vport_num); + if (err) + goto out; + + esw_attr->rx_tun_attr->decap_vport = vport_num; + } else if (netif_is_ovs_master(attr.route_dev) && mlx5e_tc_int_port_supported(esw)) { + int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv), + attr.route_dev->ifindex, + MLX5E_TC_INT_PORT_INGRESS); + if (IS_ERR(int_port)) { + err = PTR_ERR(int_port); + goto out; + } + esw_attr->int_port = int_port; + } + +out: + if (flow_attr->tun_ip_version == 4) + mlx5e_route_lookup_ipv4_put(&attr); +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + else if (flow_attr->tun_ip_version == 6) + mlx5e_route_lookup_ipv6_put(&attr); +#endif + return err; +} + +bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, + struct net_device *netdev) +{ + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(netdev); + + if (tunnel && tunnel->can_offload(priv)) + return true; + else + return false; +} + +int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev); + + if (!tunnel) { + e->reformat_type = -1; + return -EOPNOTSUPP; + } + + return tunnel->init_encap_attr(tunnel_dev, priv, e, extack); +} + +int mlx5e_tc_tun_parse(struct net_device *filter_dev, + struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + u8 *match_level) +{ + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + struct netlink_ext_ack *extack = f->common.extack; + int err = 0; + + if (!tunnel) { + netdev_warn(priv->netdev, + "decapsulation offload is not supported for %s net device\n", + mlx5e_netdev_kind(filter_dev)); + err = -EOPNOTSUPP; + goto out; + } + + *match_level = tunnel->match_level; + + if (tunnel->parse_udp_ports) { + err = tunnel->parse_udp_ports(priv, spec, f, + headers_c, headers_v); + if (err) + goto out; + } + + if (tunnel->parse_tunnel) { + err = tunnel->parse_tunnel(priv, spec, f, + headers_c, headers_v); + if (err) + goto out; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_dissector_key_basic key_basic = {}; + struct flow_dissector_key_basic mask_basic = { + .n_proto = htons(0xFFFF), + }; + struct flow_match_basic match_basic = { + .key = &key_basic, .mask = &mask_basic, + }; + struct flow_match_control match; + u16 addr_type; + + flow_rule_match_enc_control(rule, &match); + addr_type = match.key->addr_type; + + /* For tunnel addr_type used same key id`s as for non-tunnel */ + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_enc_ipv4_addrs(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4, + ntohl(match.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4, + ntohl(match.key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4, + ntohl(match.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4, + ntohl(match.key->dst)); + + key_basic.n_proto = htons(ETH_P_IP); + mlx5e_tc_set_ethertype(priv->mdev, &match_basic, true, + headers_c, headers_v); + } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_enc_ipv6_addrs(rule, &match); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, + ipv6)); + + key_basic.n_proto = htons(ETH_P_IPV6); + mlx5e_tc_set_ethertype(priv->mdev, &match_basic, true, + headers_c, headers_v); + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { + struct flow_match_ip match; + + flow_rule_match_enc_ip(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, + match.mask->tos & 0x3); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, + match.key->tos & 0x3); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, + match.mask->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, + match.key->tos >> 2); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, + match.mask->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, + match.key->ttl); + + if (match.mask->ttl && + !MLX5_CAP_ESW_FLOWTABLE_FDB + (priv->mdev, + ft_field_support.outer_ipv4_ttl)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on TTL is not supported"); + err = -EOPNOTSUPP; + goto out; + } + } + + /* let software handle IP fragments */ + MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0); + + return 0; + +out: + return err; +} + +int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_ports enc_ports; + + /* Full udp dst port must be given */ + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) { + NL_SET_ERR_MSG_MOD(extack, + "UDP tunnel decap filter must include enc_dst_port condition"); + netdev_warn(priv->netdev, + "UDP tunnel decap filter must include enc_dst_port condition\n"); + return -EOPNOTSUPP; + } + + flow_rule_match_enc_ports(rule, &enc_ports); + + if (memchr_inv(&enc_ports.mask->dst, 0xff, + sizeof(enc_ports.mask->dst))) { + NL_SET_ERR_MSG_MOD(extack, + "UDP tunnel decap filter must match enc_dst_port fully"); + netdev_warn(priv->netdev, + "UDP tunnel decap filter must match enc_dst_port fully\n"); + return -EOPNOTSUPP; + } + + /* match on UDP protocol and dst port number */ + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, + ntohs(enc_ports.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, + ntohs(enc_ports.key->dst)); + + /* UDP src port on outer header is generated by HW, + * so it is probably a bad idea to request matching it. + * Nonetheless, it is allowed. + */ + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, + ntohs(enc_ports.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, + ntohs(enc_ports.key->src)); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h new file mode 100644 index 0000000000..92065568bb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TC_TUNNEL_H__ +#define __MLX5_EN_TC_TUNNEL_H__ + +#include +#include +#include +#include +#include "en.h" +#include "en_rep.h" + +#ifdef CONFIG_MLX5_ESWITCH + +enum { + MLX5E_TC_TUNNEL_TYPE_UNKNOWN, + MLX5E_TC_TUNNEL_TYPE_VXLAN, + MLX5E_TC_TUNNEL_TYPE_GENEVE, + MLX5E_TC_TUNNEL_TYPE_GRETAP, + MLX5E_TC_TUNNEL_TYPE_MPLSOUDP, +}; + +struct mlx5e_encap_key { + const struct ip_tunnel_key *ip_tun_key; + struct mlx5e_tc_tunnel *tc_tunnel; +}; + +struct mlx5e_tc_tunnel { + int tunnel_type; + enum mlx5_flow_match_level match_level; + + bool (*can_offload)(struct mlx5e_priv *priv); + int (*calc_hlen)(struct mlx5e_encap_entry *e); + int (*init_encap_attr)(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack); + int (*generate_ip_tun_hdr)(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *e); + int (*parse_udp_ports)(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v); + int (*parse_tunnel)(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v); + bool (*encap_info_equal)(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b); + int (*get_remote_ifindex)(struct net_device *mirred_dev); +}; + +extern struct mlx5e_tc_tunnel vxlan_tunnel; +extern struct mlx5e_tc_tunnel geneve_tunnel; +extern struct mlx5e_tc_tunnel gre_tunnel; +extern struct mlx5e_tc_tunnel mplsoudp_tunnel; + +struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev); + +int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack); + +int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e); +int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e); + +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) +int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e); +int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e); +#else +static inline int +mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ return -EOPNOTSUPP; } +static inline int +mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ return -EOPNOTSUPP; } +#endif +int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr, + struct net_device *filter_dev); + +bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, + struct net_device *netdev); + +int mlx5e_tc_tun_parse(struct net_device *filter_dev, + struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + u8 *match_level); + +int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v); + +bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b); + +bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b, + __be16 tun_flags); +#endif /* CONFIG_MLX5_ESWITCH */ + +#endif //__MLX5_EN_TC_TUNNEL_H__ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c new file mode 100644 index 0000000000..f1d1e1542e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -0,0 +1,1885 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021 Mellanox Technologies. */ + +#include +#include +#include +#include "tc_tun_encap.h" +#include "en_tc.h" +#include "tc_tun.h" +#include "rep/tc.h" +#include "diag/en_tc_tracepoint.h" + +enum { + MLX5E_ROUTE_ENTRY_VALID = BIT(0), +}; + +static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + struct mlx5e_encap_entry *e, + int out_index) +{ + struct net_device *route_dev; + int err = 0; + + route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex); + + if (!route_dev || !netif_is_ovs_master(route_dev) || + attr->parse_attr->filter_dev == e->out_dev) + goto out; + + err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex, + MLX5E_TC_INT_PORT_EGRESS, + &attr->action, out_index); + +out: + if (route_dev) + dev_put(route_dev); + + return err; +} + +struct mlx5e_route_key { + int ip_version; + union { + __be32 v4; + struct in6_addr v6; + } endpoint_ip; +}; + +struct mlx5e_route_entry { + struct mlx5e_route_key key; + struct list_head encap_entries; + struct list_head decap_flows; + u32 flags; + struct hlist_node hlist; + refcount_t refcnt; + int tunnel_dev_index; + struct rcu_head rcu; +}; + +struct mlx5e_tc_tun_encap { + struct mlx5e_priv *priv; + struct notifier_block fib_nb; + spinlock_t route_lock; /* protects route_tbl */ + unsigned long route_tbl_last_update; + DECLARE_HASHTABLE(route_tbl, 8); +}; + +static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r) +{ + return r->flags & MLX5E_ROUTE_ENTRY_VALID; +} + +int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec) +{ + struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; + struct mlx5_rx_tun_attr *tun_attr; + void *daddr, *saddr; + u8 ip_version; + + tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL); + if (!tun_attr) + return -ENOMEM; + + esw_attr->rx_tun_attr = tun_attr; + ip_version = mlx5e_tc_get_ip_version(spec, true); + + if (ip_version == 4) { + daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); + tun_attr->dst_ip.v4 = *(__be32 *)daddr; + tun_attr->src_ip.v4 = *(__be32 *)saddr; + if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4) + return 0; + } +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + else if (ip_version == 6) { + int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6); + + daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6); + saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6); + memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size); + memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size); + if (ipv6_addr_any(&tun_attr->dst_ip.v6) || + ipv6_addr_any(&tun_attr->src_ip.v6)) + return 0; + } +#endif + /* Only set the flag if both src and dst ip addresses exist. They are + * required to establish routing. + */ + flow_flag_set(flow, TUN_RX); + flow->attr->tun_ip_version = ip_version; + return 0; +} + +static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr) +{ + bool all_flow_encaps_valid = true; + int i; + + /* Flow can be associated with multiple encap entries. + * Before offloading the flow verify that all of them have + * a valid neighbour. + */ + for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { + if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)) + continue; + if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) { + all_flow_encaps_valid = false; + break; + } + } + + return all_flow_encaps_valid; +} + +void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct list_head *flow_list) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_pkt_reformat_params reformat_params; + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_attr *attr; + struct mlx5_flow_spec *spec; + struct mlx5e_tc_flow *flow; + int err; + + if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE) + return; + + memset(&reformat_params, 0, sizeof(reformat_params)); + reformat_params.type = e->reformat_type; + reformat_params.size = e->encap_size; + reformat_params.data = e->encap_header; + e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, + &reformat_params, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(e->pkt_reformat)) { + mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n", + PTR_ERR(e->pkt_reformat)); + return; + } + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(priv); + + list_for_each_entry(flow, flow_list, tmp_list) { + if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW)) + continue; + + spec = &flow->attr->parse_attr->spec; + + attr = mlx5e_tc_get_encap_attr(flow); + esw_attr = attr->esw_attr; + esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; + esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; + + /* Do not offload flows with unresolved neighbors */ + if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) + continue; + + err = mlx5e_tc_offload_flow_post_acts(flow); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n", + err); + continue; + } + + /* update from slow path rule to encap rule */ + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr); + if (IS_ERR(rule)) { + mlx5e_tc_unoffload_flow_post_acts(flow); + err = PTR_ERR(rule); + mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", + err); + continue; + } + + mlx5e_tc_unoffload_from_slow_path(esw, flow); + flow->rule[0] = rule; + /* was unset when slow path rule removed */ + flow_flag_set(flow, OFFLOADED); + } +} + +void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct list_head *flow_list) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_attr *attr; + struct mlx5_flow_spec *spec; + struct mlx5e_tc_flow *flow; + int err; + + list_for_each_entry(flow, flow_list, tmp_list) { + if (!mlx5e_is_offloaded_flow(flow)) + continue; + + attr = mlx5e_tc_get_encap_attr(flow); + esw_attr = attr->esw_attr; + /* mark the flow's encap dest as non-valid */ + esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; + esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL; + + /* Clear pkt_reformat before checking slow path flag. Because + * in next iteration, the same flow is already set slow path + * flag, but still need to clear the pkt_reformat. + */ + if (flow_flag_test(flow, SLOW)) + continue; + + /* update from encap rule to slow path rule */ + spec = &flow->attr->parse_attr->spec; + rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); + + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", + err); + continue; + } + + mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); + mlx5e_tc_unoffload_flow_post_acts(flow); + flow->rule[0] = rule; + /* was unset when fast path rule removed */ + flow_flag_set(flow, OFFLOADED); + } + + /* we know that the encap is valid */ + e->flags &= ~MLX5_ENCAP_ENTRY_VALID; + mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); + e->pkt_reformat = NULL; +} + +static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow, + struct list_head *flow_list, + int index) +{ + if (IS_ERR(mlx5e_flow_get(flow))) { + /* Flow is being deleted concurrently. Wait for it to be + * unoffloaded from hardware, otherwise deleting encap will + * fail. + */ + wait_for_completion(&flow->del_hw_done); + return; + } + wait_for_completion(&flow->init_done); + + flow->tmp_entry_index = index; + list_add(&flow->tmp_list, flow_list); +} + +/* Takes reference to all flows attached to encap and adds the flows to + * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. + */ +void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list) +{ + struct encap_flow_item *efi; + struct mlx5e_tc_flow *flow; + + list_for_each_entry(efi, &e->flows, list) { + flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); + mlx5e_take_tmp_flow(flow, flow_list, efi->index); + } +} + +/* Takes reference to all flows attached to route and adds the flows to + * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. + */ +static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r, + struct list_head *flow_list) +{ + struct mlx5e_tc_flow *flow; + + list_for_each_entry(flow, &r->decap_flows, decap_routes) + mlx5e_take_tmp_flow(flow, flow_list, 0); +} + +typedef bool (match_cb)(struct mlx5e_encap_entry *); + +static struct mlx5e_encap_entry * +mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe, + struct mlx5e_encap_entry *e, + match_cb match) +{ + struct mlx5e_encap_entry *next = NULL; + +retry: + rcu_read_lock(); + + /* find encap with non-zero reference counter value */ + for (next = e ? + list_next_or_null_rcu(&nhe->encap_list, + &e->encap_list, + struct mlx5e_encap_entry, + encap_list) : + list_first_or_null_rcu(&nhe->encap_list, + struct mlx5e_encap_entry, + encap_list); + next; + next = list_next_or_null_rcu(&nhe->encap_list, + &next->encap_list, + struct mlx5e_encap_entry, + encap_list)) + if (mlx5e_encap_take(next)) + break; + + rcu_read_unlock(); + + /* release starting encap */ + if (e) + mlx5e_encap_put(netdev_priv(e->out_dev), e); + if (!next) + return next; + + /* wait for encap to be fully initialized */ + wait_for_completion(&next->res_ready); + /* continue searching if encap entry is not in valid state after completion */ + if (!match(next)) { + e = next; + goto retry; + } + + return next; +} + +static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e) +{ + return e->flags & MLX5_ENCAP_ENTRY_VALID; +} + +static struct mlx5e_encap_entry * +mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, + struct mlx5e_encap_entry *e) +{ + return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid); +} + +static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e) +{ + return e->compl_result >= 0; +} + +struct mlx5e_encap_entry * +mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe, + struct mlx5e_encap_entry *e) +{ + return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized); +} + +void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_neigh *m_neigh = &nhe->m_neigh; + struct mlx5e_encap_entry *e = NULL; + struct mlx5e_tc_flow *flow; + struct mlx5_fc *counter; + struct neigh_table *tbl; + bool neigh_used = false; + struct neighbour *n; + u64 lastuse; + + if (m_neigh->family == AF_INET) + tbl = &arp_tbl; +#if IS_ENABLED(CONFIG_IPV6) + else if (m_neigh->family == AF_INET6) + tbl = ipv6_stub->nd_tbl; +#endif + else + return; + + /* mlx5e_get_next_valid_encap() releases previous encap before returning + * next one. + */ + while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) { + struct mlx5e_priv *priv = netdev_priv(e->out_dev); + struct encap_flow_item *efi, *tmp; + struct mlx5_eswitch *esw; + LIST_HEAD(flow_list); + + esw = priv->mdev->priv.eswitch; + mutex_lock(&esw->offloads.encap_tbl_lock); + list_for_each_entry_safe(efi, tmp, &e->flows, list) { + flow = container_of(efi, struct mlx5e_tc_flow, + encaps[efi->index]); + if (IS_ERR(mlx5e_flow_get(flow))) + continue; + list_add(&flow->tmp_list, &flow_list); + + if (mlx5e_is_offloaded_flow(flow)) { + counter = mlx5e_tc_get_counter(flow); + lastuse = mlx5_fc_query_lastuse(counter); + if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { + neigh_used = true; + break; + } + } + } + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_put_flow_list(priv, &flow_list); + if (neigh_used) { + /* release current encap before breaking the loop */ + mlx5e_encap_put(priv, e); + break; + } + } + + trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used); + + if (neigh_used) { + nhe->reported_lastuse = jiffies; + + /* find the relevant neigh according to the cached device and + * dst ip pair + */ + n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev)); + if (!n) + return; + + neigh_event_send(n, NULL); + neigh_release(n); + } +} + +static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) +{ + WARN_ON(!list_empty(&e->flows)); + + if (e->compl_result > 0) { + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); + + if (e->flags & MLX5_ENCAP_ENTRY_VALID) + mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); + } + + kfree(e->tun_info); + kfree(e->encap_header); + kfree_rcu(e, rcu); +} + +static void mlx5e_decap_dealloc(struct mlx5e_priv *priv, + struct mlx5e_decap_entry *d) +{ + WARN_ON(!list_empty(&d->flows)); + + if (!d->compl_result) + mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat); + + kfree_rcu(d, rcu); +} + +void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock)) + return; + list_del(&e->route_list); + hash_del_rcu(&e->encap_hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_encap_dealloc(priv, e); +} + +static void mlx5e_encap_put_locked(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + lockdep_assert_held(&esw->offloads.encap_tbl_lock); + + if (!refcount_dec_and_test(&e->refcnt)) + return; + list_del(&e->route_list); + hash_del_rcu(&e->encap_hlist); + mlx5e_encap_dealloc(priv, e); +} + +static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock)) + return; + hash_del_rcu(&d->hlist); + mutex_unlock(&esw->offloads.decap_tbl_lock); + + mlx5e_decap_dealloc(priv, d); +} + +static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + int out_index); + +void mlx5e_detach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + int out_index) +{ + struct mlx5e_encap_entry *e = flow->encaps[out_index].e; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (!mlx5e_is_eswitch_flow(flow)) + return; + + if (attr->esw_attr->dests[out_index].flags & + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) + mlx5e_detach_encap_route(priv, flow, out_index); + + /* flow wasn't fully initialized */ + if (!e) + return; + + mutex_lock(&esw->offloads.encap_tbl_lock); + list_del(&flow->encaps[out_index].list); + flow->encaps[out_index].e = NULL; + if (!refcount_dec_and_test(&e->refcnt)) { + mutex_unlock(&esw->offloads.encap_tbl_lock); + return; + } + list_del(&e->route_list); + hash_del_rcu(&e->encap_hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_encap_dealloc(priv, e); +} + +void mlx5e_detach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_decap_entry *d = flow->decap_reformat; + + if (!d) + return; + + mutex_lock(&esw->offloads.decap_tbl_lock); + list_del(&flow->l3_to_l2_reformat); + flow->decap_reformat = NULL; + + if (!refcount_dec_and_test(&d->refcnt)) { + mutex_unlock(&esw->offloads.decap_tbl_lock); + return; + } + hash_del_rcu(&d->hlist); + mutex_unlock(&esw->offloads.decap_tbl_lock); + + mlx5e_decap_dealloc(priv, d); +} + +bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b) +{ + return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 && + a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type; +} + +bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b, + __be16 tun_flags) +{ + struct ip_tunnel_info *a_info; + struct ip_tunnel_info *b_info; + bool a_has_opts, b_has_opts; + + if (!mlx5e_tc_tun_encap_info_equal_generic(a, b)) + return false; + + a_has_opts = !!(a->ip_tun_key->tun_flags & tun_flags); + b_has_opts = !!(b->ip_tun_key->tun_flags & tun_flags); + + /* keys are equal when both don't have any options attached */ + if (!a_has_opts && !b_has_opts) + return true; + + if (a_has_opts != b_has_opts) + return false; + + /* options stored in memory next to ip_tunnel_info struct */ + a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key); + b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key); + + return a_info->options_len == b_info->options_len && + !memcmp(ip_tunnel_info_opts(a_info), + ip_tunnel_info_opts(b_info), + a_info->options_len); +} + +static int cmp_decap_info(struct mlx5e_decap_key *a, + struct mlx5e_decap_key *b) +{ + return memcmp(&a->key, &b->key, sizeof(b->key)); +} + +static int hash_encap_info(struct mlx5e_encap_key *key) +{ + return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), + key->tc_tunnel->tunnel_type); +} + +static int hash_decap_info(struct mlx5e_decap_key *key) +{ + return jhash(&key->key, sizeof(key->key), 0); +} + +bool mlx5e_encap_take(struct mlx5e_encap_entry *e) +{ + return refcount_inc_not_zero(&e->refcnt); +} + +static bool mlx5e_decap_take(struct mlx5e_decap_entry *e) +{ + return refcount_inc_not_zero(&e->refcnt); +} + +static struct mlx5e_encap_entry * +mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key, + uintptr_t hash_key) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_encap_key e_key; + struct mlx5e_encap_entry *e; + + hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, + encap_hlist, hash_key) { + e_key.ip_tun_key = &e->tun_info->key; + e_key.tc_tunnel = e->tunnel; + if (e->tunnel->encap_info_equal(&e_key, key) && + mlx5e_encap_take(e)) + return e; + } + + return NULL; +} + +static struct mlx5e_decap_entry * +mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key, + uintptr_t hash_key) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_decap_key r_key; + struct mlx5e_decap_entry *e; + + hash_for_each_possible_rcu(esw->offloads.decap_tbl, e, + hlist, hash_key) { + r_key = e->key; + if (!cmp_decap_info(&r_key, key) && + mlx5e_decap_take(e)) + return e; + } + return NULL; +} + +struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info) +{ + size_t tun_size = sizeof(*tun_info) + tun_info->options_len; + + return kmemdup(tun_info, tun_size, GFP_KERNEL); +} + +static bool is_duplicated_encap_entry(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + int out_index, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + int i; + + for (i = 0; i < out_index; i++) { + if (flow->encaps[i].e != e) + continue; + NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action"); + netdev_err(priv->netdev, "can't duplicate encap action\n"); + return true; + } + + return false; +} + +static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + struct net_device *out_dev, + int route_dev_ifindex, + int out_index) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct net_device *route_dev; + u16 vport_num; + int err = 0; + u32 data; + + route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex); + + if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops || + !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) + goto out; + + err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num); + if (err) + goto out; + + attr->dest_chain = 0; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE; + data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch, + vport_num); + err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts, + MLX5_FLOW_NAMESPACE_FDB, + VPORT_TO_REG, data); + if (err >= 0) { + esw_attr->dests[out_index].src_port_rewrite_act_id = err; + err = 0; + } + +out: + if (route_dev) + dev_put(route_dev); + return err; +} + +static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + struct net_device *out_dev, + int route_dev_ifindex, + int out_index) +{ + int act_id = attr->dests[out_index].src_port_rewrite_act_id; + struct net_device *route_dev; + u16 vport_num; + int err = 0; + u32 data; + + route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex); + + if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops || + !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) { + err = -ENODEV; + goto out; + } + + err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num); + if (err) + goto out; + + data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch, + vport_num); + mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data); + +out: + if (route_dev) + dev_put(route_dev); + return err; +} + +static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5e_tc_tun_encap *encap; + unsigned int ret; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + encap = uplink_priv->encap; + + spin_lock_bh(&encap->route_lock); + ret = encap->route_tbl_last_update; + spin_unlock_bh(&encap->route_lock); + return ret; +} + +static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct mlx5e_encap_entry *e, + bool new_encap_entry, + unsigned long tbl_time_before, + int out_index); + +int mlx5e_attach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct net_device *mirred_dev, + int out_index, + struct netlink_ext_ack *extack, + struct net_device **encap_dev) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow_parse_attr *parse_attr; + const struct ip_tunnel_info *tun_info; + const struct mlx5e_mpls_info *mpls_info; + unsigned long tbl_time_before = 0; + struct mlx5e_encap_entry *e; + struct mlx5e_encap_key key; + bool entry_created = false; + unsigned short family; + uintptr_t hash_key; + int err = 0; + + lockdep_assert_held(&esw->offloads.encap_tbl_lock); + + parse_attr = attr->parse_attr; + tun_info = parse_attr->tun_info[out_index]; + mpls_info = &parse_attr->mpls_info[out_index]; + family = ip_tunnel_info_af(tun_info); + key.ip_tun_key = &tun_info->key; + key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev); + if (!key.tc_tunnel) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel"); + return -EOPNOTSUPP; + } + + hash_key = hash_encap_info(&key); + + e = mlx5e_encap_get(priv, &key, hash_key); + + /* must verify if encap is valid or not */ + if (e) { + /* Check that entry was not already attached to this flow */ + if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) { + err = -EOPNOTSUPP; + goto out_err; + } + + goto attach_flow; + } + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) { + err = -ENOMEM; + goto out_err; + } + + refcount_set(&e->refcnt, 1); + init_completion(&e->res_ready); + entry_created = true; + INIT_LIST_HEAD(&e->route_list); + + tun_info = mlx5e_dup_tun_info(tun_info); + if (!tun_info) { + err = -ENOMEM; + goto out_err_init; + } + e->tun_info = tun_info; + memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info)); + err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); + if (err) + goto out_err_init; + + INIT_LIST_HEAD(&e->flows); + hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); + tbl_time_before = mlx5e_route_tbl_get_last_update(priv); + + if (family == AF_INET) + err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); + else if (family == AF_INET6) + err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e); + + complete_all(&e->res_ready); + if (err) { + e->compl_result = err; + goto out_err; + } + e->compl_result = 1; + +attach_flow: + err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created, + tbl_time_before, out_index); + if (err) + goto out_err; + + err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index); + if (err == -EOPNOTSUPP) { + /* If device doesn't support int port offload, + * redirect to uplink vport. + */ + mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n"); + err = 0; + } else if (err) { + goto out_err; + } + + flow->encaps[out_index].e = e; + list_add(&flow->encaps[out_index].list, &e->flows); + flow->encaps[out_index].index = out_index; + *encap_dev = e->out_dev; + if (e->flags & MLX5_ENCAP_ENTRY_VALID) { + attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat; + attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; + } else { + flow_flag_set(flow, SLOW); + } + + return err; + +out_err: + if (e) + mlx5e_encap_put_locked(priv, e); + return err; + +out_err_init: + kfree(tun_info); + kfree(e); + return err; +} + +int mlx5e_attach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; + struct mlx5_pkt_reformat_params reformat_params; + struct mlx5e_decap_entry *d; + struct mlx5e_decap_key key; + uintptr_t hash_key; + int err = 0; + + if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { + NL_SET_ERR_MSG_MOD(extack, + "encap header larger than max supported"); + return -EOPNOTSUPP; + } + + key.key = attr->eth; + hash_key = hash_decap_info(&key); + mutex_lock(&esw->offloads.decap_tbl_lock); + d = mlx5e_decap_get(priv, &key, hash_key); + if (d) { + mutex_unlock(&esw->offloads.decap_tbl_lock); + wait_for_completion(&d->res_ready); + mutex_lock(&esw->offloads.decap_tbl_lock); + if (d->compl_result) { + err = -EREMOTEIO; + goto out_free; + } + goto found; + } + + d = kzalloc(sizeof(*d), GFP_KERNEL); + if (!d) { + err = -ENOMEM; + goto out_err; + } + + d->key = key; + refcount_set(&d->refcnt, 1); + init_completion(&d->res_ready); + INIT_LIST_HEAD(&d->flows); + hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key); + mutex_unlock(&esw->offloads.decap_tbl_lock); + + memset(&reformat_params, 0, sizeof(reformat_params)); + reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; + reformat_params.size = sizeof(attr->eth); + reformat_params.data = &attr->eth; + d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, + &reformat_params, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(d->pkt_reformat)) { + err = PTR_ERR(d->pkt_reformat); + d->compl_result = err; + } + mutex_lock(&esw->offloads.decap_tbl_lock); + complete_all(&d->res_ready); + if (err) + goto out_free; + +found: + flow->decap_reformat = d; + attr->decap_pkt_reformat = d->pkt_reformat; + list_add(&flow->l3_to_l2_reformat, &d->flows); + mutex_unlock(&esw->offloads.decap_tbl_lock); + return 0; + +out_free: + mutex_unlock(&esw->offloads.decap_tbl_lock); + mlx5e_decap_put(priv, d); + return err; + +out_err: + mutex_unlock(&esw->offloads.decap_tbl_lock); + return err; +} + +int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct netlink_ext_ack *extack, + bool *vf_tun) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_esw_flow_attr *esw_attr; + struct net_device *encap_dev = NULL; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_priv *out_priv; + struct mlx5_eswitch *esw; + int out_index; + int err = 0; + + parse_attr = attr->parse_attr; + esw_attr = attr->esw_attr; + *vf_tun = false; + + esw = priv->mdev->priv.eswitch; + mutex_lock(&esw->offloads.encap_tbl_lock); + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { + struct net_device *out_dev; + int mirred_ifindex; + + if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) + continue; + + mirred_ifindex = parse_attr->mirred_ifindex[out_index]; + out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex); + if (!out_dev) { + NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found"); + err = -ENODEV; + goto out; + } + err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index, + extack, &encap_dev); + dev_put(out_dev); + if (err) + goto out; + + if (esw_attr->dests[out_index].flags & + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && + !esw_attr->dest_int_port) + *vf_tun = true; + + out_priv = netdev_priv(encap_dev); + rpriv = out_priv->ppriv; + esw_attr->dests[out_index].vport_valid = true; + esw_attr->dests[out_index].vport = rpriv->rep->vport; + esw_attr->dests[out_index].mdev = out_priv->mdev; + } + + if (*vf_tun && esw_attr->out_count > 1) { + NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported"); + err = -EOPNOTSUPP; + goto out; + } + +out: + mutex_unlock(&esw->offloads.encap_tbl_lock); + return err; +} + +void mlx5e_tc_tun_encap_dests_unset(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr; + int out_index; + + if (!mlx5e_is_eswitch_flow(flow)) + return; + + esw_attr = attr->esw_attr; + + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { + if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) + continue; + + mlx5e_detach_encap(flow->priv, flow, attr, out_index); + kfree(attr->parse_attr->tun_info[out_index]); + } +} + +static int cmp_route_info(struct mlx5e_route_key *a, + struct mlx5e_route_key *b) +{ + if (a->ip_version == 4 && b->ip_version == 4) + return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4, + sizeof(a->endpoint_ip.v4)); + else if (a->ip_version == 6 && b->ip_version == 6) + return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6, + sizeof(a->endpoint_ip.v6)); + return 1; +} + +static u32 hash_route_info(struct mlx5e_route_key *key) +{ + if (key->ip_version == 4) + return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0); + return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0); +} + +static void mlx5e_route_dealloc(struct mlx5e_priv *priv, + struct mlx5e_route_entry *r) +{ + WARN_ON(!list_empty(&r->decap_flows)); + WARN_ON(!list_empty(&r->encap_entries)); + + kfree_rcu(r, rcu); +} + +static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock)) + return; + + hash_del_rcu(&r->hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_route_dealloc(priv, r); +} + +static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + lockdep_assert_held(&esw->offloads.encap_tbl_lock); + + if (!refcount_dec_and_test(&r->refcnt)) + return; + hash_del_rcu(&r->hlist); + mlx5e_route_dealloc(priv, r); +} + +static struct mlx5e_route_entry * +mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key, + u32 hash_key) +{ + struct mlx5e_route_key r_key; + struct mlx5e_route_entry *r; + + hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) { + r_key = r->key; + if (!cmp_route_info(&r_key, key) && + refcount_inc_not_zero(&r->refcnt)) + return r; + } + return NULL; +} + +static struct mlx5e_route_entry * +mlx5e_route_get_create(struct mlx5e_priv *priv, + struct mlx5e_route_key *key, + int tunnel_dev_index, + unsigned long *route_tbl_change_time) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5e_tc_tun_encap *encap; + struct mlx5e_route_entry *r; + u32 hash_key; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + encap = uplink_priv->encap; + + hash_key = hash_route_info(key); + spin_lock_bh(&encap->route_lock); + r = mlx5e_route_get(encap, key, hash_key); + spin_unlock_bh(&encap->route_lock); + if (r) { + if (!mlx5e_route_entry_valid(r)) { + mlx5e_route_put_locked(priv, r); + return ERR_PTR(-EINVAL); + } + return r; + } + + r = kzalloc(sizeof(*r), GFP_KERNEL); + if (!r) + return ERR_PTR(-ENOMEM); + + r->key = *key; + r->flags |= MLX5E_ROUTE_ENTRY_VALID; + r->tunnel_dev_index = tunnel_dev_index; + refcount_set(&r->refcnt, 1); + INIT_LIST_HEAD(&r->decap_flows); + INIT_LIST_HEAD(&r->encap_entries); + + spin_lock_bh(&encap->route_lock); + *route_tbl_change_time = encap->route_tbl_last_update; + hash_add(encap->route_tbl, &r->hlist, hash_key); + spin_unlock_bh(&encap->route_lock); + + return r; +} + +static struct mlx5e_route_entry * +mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key) +{ + u32 hash_key = hash_route_info(key); + struct mlx5e_route_entry *r; + + spin_lock_bh(&encap->route_lock); + encap->route_tbl_last_update = jiffies; + r = mlx5e_route_get(encap, key, hash_key); + spin_unlock_bh(&encap->route_lock); + + return r; +} + +struct mlx5e_tc_fib_event_data { + struct work_struct work; + unsigned long event; + struct mlx5e_route_entry *r; + struct net_device *ul_dev; +}; + +static void mlx5e_tc_fib_event_work(struct work_struct *work); +static struct mlx5e_tc_fib_event_data * +mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags) +{ + struct mlx5e_tc_fib_event_data *fib_work; + + fib_work = kzalloc(sizeof(*fib_work), flags); + if (WARN_ON(!fib_work)) + return NULL; + + INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work); + fib_work->event = event; + fib_work->ul_dev = ul_dev; + + return fib_work; +} + +static int +mlx5e_route_enqueue_update(struct mlx5e_priv *priv, + struct mlx5e_route_entry *r, + unsigned long event) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_fib_event_data *fib_work; + struct mlx5e_rep_priv *uplink_rpriv; + struct net_device *ul_dev; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + ul_dev = uplink_rpriv->netdev; + + fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL); + if (!fib_work) + return -ENOMEM; + + dev_hold(ul_dev); + refcount_inc(&r->refcnt); + fib_work->r = r; + queue_work(priv->wq, &fib_work->work); + + return 0; +} + +int mlx5e_attach_decap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + unsigned long tbl_time_before, tbl_time_after; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5e_route_entry *r; + struct mlx5e_route_key key; + int err = 0; + + esw_attr = attr->esw_attr; + parse_attr = attr->parse_attr; + mutex_lock(&esw->offloads.encap_tbl_lock); + if (!esw_attr->rx_tun_attr) + goto out; + + tbl_time_before = mlx5e_route_tbl_get_last_update(priv); + tbl_time_after = tbl_time_before; + err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev); + if (err || !esw_attr->rx_tun_attr->decap_vport) + goto out; + + key.ip_version = attr->tun_ip_version; + if (key.ip_version == 4) + key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4; + else + key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6; + + r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex, + &tbl_time_after); + if (IS_ERR(r)) { + err = PTR_ERR(r); + goto out; + } + /* Routing changed concurrently. FIB event handler might have missed new + * entry, schedule update. + */ + if (tbl_time_before != tbl_time_after) { + err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE); + if (err) { + mlx5e_route_put_locked(priv, r); + goto out; + } + } + + flow->decap_route = r; + list_add(&flow->decap_routes, &r->decap_flows); + mutex_unlock(&esw->offloads.encap_tbl_lock); + return 0; + +out: + mutex_unlock(&esw->offloads.encap_tbl_lock); + return err; +} + +static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct mlx5e_encap_entry *e, + bool new_encap_entry, + unsigned long tbl_time_before, + int out_index) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + unsigned long tbl_time_after = tbl_time_before; + struct mlx5e_tc_flow_parse_attr *parse_attr; + const struct ip_tunnel_info *tun_info; + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5e_route_entry *r; + struct mlx5e_route_key key; + unsigned short family; + int err = 0; + + esw_attr = attr->esw_attr; + parse_attr = attr->parse_attr; + tun_info = parse_attr->tun_info[out_index]; + family = ip_tunnel_info_af(tun_info); + + if (family == AF_INET) { + key.endpoint_ip.v4 = tun_info->key.u.ipv4.src; + key.ip_version = 4; + } else if (family == AF_INET6) { + key.endpoint_ip.v6 = tun_info->key.u.ipv6.src; + key.ip_version = 6; + } + + err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev, + e->route_dev_ifindex, out_index); + if (err || !(esw_attr->dests[out_index].flags & + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)) + return err; + + r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index], + &tbl_time_after); + if (IS_ERR(r)) + return PTR_ERR(r); + /* Routing changed concurrently. FIB event handler might have missed new + * entry, schedule update. + */ + if (tbl_time_before != tbl_time_after) { + err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE); + if (err) { + mlx5e_route_put_locked(priv, r); + return err; + } + } + + flow->encap_routes[out_index].r = r; + if (new_encap_entry) + list_add(&e->route_list, &r->encap_entries); + flow->encap_routes[out_index].index = out_index; + return 0; +} + +void mlx5e_detach_decap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_route_entry *r = flow->decap_route; + + if (!r) + return; + + mutex_lock(&esw->offloads.encap_tbl_lock); + list_del(&flow->decap_routes); + flow->decap_route = NULL; + + if (!refcount_dec_and_test(&r->refcnt)) { + mutex_unlock(&esw->offloads.encap_tbl_lock); + return; + } + hash_del_rcu(&r->hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_route_dealloc(priv, r); +} + +static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + int out_index) +{ + struct mlx5e_route_entry *r = flow->encap_routes[out_index].r; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_encap_entry *e, *tmp; + + if (!r) + return; + + mutex_lock(&esw->offloads.encap_tbl_lock); + flow->encap_routes[out_index].r = NULL; + + if (!refcount_dec_and_test(&r->refcnt)) { + mutex_unlock(&esw->offloads.encap_tbl_lock); + return; + } + list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list) + list_del_init(&e->route_list); + hash_del_rcu(&r->hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_route_dealloc(priv, r); +} + +static void mlx5e_invalidate_encap(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct list_head *encap_flows) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow *flow; + + list_for_each_entry(flow, encap_flows, tmp_list) { + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5_flow_attr *attr; + + if (!mlx5e_is_offloaded_flow(flow)) + continue; + + attr = mlx5e_tc_get_encap_attr(flow); + esw_attr = attr->esw_attr; + + if (flow_flag_test(flow, SLOW)) { + mlx5e_tc_unoffload_from_slow_path(esw, flow); + } else { + mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); + mlx5e_tc_unoffload_flow_post_acts(flow); + } + + mlx5e_tc_detach_mod_hdr(priv, flow, attr); + attr->modify_hdr = NULL; + + esw_attr->dests[flow->tmp_entry_index].flags &= + ~MLX5_ESW_DEST_ENCAP_VALID; + esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL; + } + + e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE; + if (e->flags & MLX5_ENCAP_ENTRY_VALID) { + e->flags &= ~MLX5_ENCAP_ENTRY_VALID; + mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); + e->pkt_reformat = NULL; + } +} + +static void mlx5e_reoffload_encap(struct mlx5e_priv *priv, + struct net_device *tunnel_dev, + struct mlx5e_encap_entry *e, + struct list_head *encap_flows) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow *flow; + int err; + + err = ip_tunnel_info_af(e->tun_info) == AF_INET ? + mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) : + mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e); + if (err) + mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err); + e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE; + + list_for_each_entry(flow, encap_flows, tmp_list) { + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_attr *attr; + struct mlx5_flow_spec *spec; + + if (flow_flag_test(flow, FAILED)) + continue; + + spec = &flow->attr->parse_attr->spec; + + attr = mlx5e_tc_get_encap_attr(flow); + esw_attr = attr->esw_attr; + parse_attr = attr->parse_attr; + + err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts, + e->out_dev, e->route_dev_ifindex, + flow->tmp_entry_index); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err); + continue; + } + + err = mlx5e_tc_attach_mod_hdr(priv, flow, attr); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d", + err); + continue; + } + + if (e->flags & MLX5_ENCAP_ENTRY_VALID) { + esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; + esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; + if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) + goto offload_to_slow_path; + + err = mlx5e_tc_offload_flow_post_acts(flow); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n", + err); + goto offload_to_slow_path; + } + + /* update from slow path rule to encap rule */ + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr); + if (IS_ERR(rule)) { + mlx5e_tc_unoffload_flow_post_acts(flow); + err = PTR_ERR(rule); + mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", + err); + } else { + flow->rule[0] = rule; + } + } else { +offload_to_slow_path: + rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); + /* mark the flow's encap dest as non-valid */ + esw_attr->dests[flow->tmp_entry_index].flags &= + ~MLX5_ESW_DEST_ENCAP_VALID; + + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", + err); + } else { + flow->rule[0] = rule; + } + } + flow_flag_set(flow, OFFLOADED); + } +} + +static int mlx5e_update_route_encaps(struct mlx5e_priv *priv, + struct mlx5e_route_entry *r, + struct list_head *flow_list, + bool replace) +{ + struct net_device *tunnel_dev; + struct mlx5e_encap_entry *e; + + tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index); + if (!tunnel_dev) + return -ENODEV; + + list_for_each_entry(e, &r->encap_entries, route_list) { + LIST_HEAD(encap_flows); + + mlx5e_take_all_encap_flows(e, &encap_flows); + if (list_empty(&encap_flows)) + continue; + + if (mlx5e_route_entry_valid(r)) + mlx5e_invalidate_encap(priv, e, &encap_flows); + + if (!replace) { + list_splice(&encap_flows, flow_list); + continue; + } + + mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows); + list_splice(&encap_flows, flow_list); + } + + return 0; +} + +static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv, + struct list_head *flow_list) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow *flow; + + list_for_each_entry(flow, flow_list, tmp_list) + if (mlx5e_is_offloaded_flow(flow)) + mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); +} + +static void mlx5e_reoffload_decap(struct mlx5e_priv *priv, + struct list_head *decap_flows) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow *flow; + + list_for_each_entry(flow, decap_flows, tmp_list) { + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err; + + if (flow_flag_test(flow, FAILED)) + continue; + + parse_attr = attr->parse_attr; + spec = &parse_attr->spec; + err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n", + err); + continue; + } + + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n", + err); + } else { + flow->rule[0] = rule; + flow_flag_set(flow, OFFLOADED); + } + } +} + +static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv, + struct mlx5e_route_entry *r, + struct list_head *flow_list, + bool replace) +{ + struct net_device *tunnel_dev; + LIST_HEAD(decap_flows); + + tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index); + if (!tunnel_dev) + return -ENODEV; + + mlx5e_take_all_route_decap_flows(r, &decap_flows); + if (mlx5e_route_entry_valid(r)) + mlx5e_unoffload_flow_list(priv, &decap_flows); + if (replace) + mlx5e_reoffload_decap(priv, &decap_flows); + + list_splice(&decap_flows, flow_list); + + return 0; +} + +static void mlx5e_tc_fib_event_work(struct work_struct *work) +{ + struct mlx5e_tc_fib_event_data *event_data = + container_of(work, struct mlx5e_tc_fib_event_data, work); + struct net_device *ul_dev = event_data->ul_dev; + struct mlx5e_priv *priv = netdev_priv(ul_dev); + struct mlx5e_route_entry *r = event_data->r; + struct mlx5_eswitch *esw; + LIST_HEAD(flow_list); + bool replace; + int err; + + /* sync with concurrent neigh updates */ + rtnl_lock(); + esw = priv->mdev->priv.eswitch; + mutex_lock(&esw->offloads.encap_tbl_lock); + replace = event_data->event == FIB_EVENT_ENTRY_REPLACE; + + if (!mlx5e_route_entry_valid(r) && !replace) + goto out; + + err = mlx5e_update_route_encaps(priv, r, &flow_list, replace); + if (err) + mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n", + err); + + err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace); + if (err) + mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n", + err); + + if (replace) + r->flags |= MLX5E_ROUTE_ENTRY_VALID; +out: + mutex_unlock(&esw->offloads.encap_tbl_lock); + rtnl_unlock(); + + mlx5e_put_flow_list(priv, &flow_list); + mlx5e_route_put(priv, event_data->r); + dev_put(event_data->ul_dev); + kfree(event_data); +} + +static struct mlx5e_tc_fib_event_data * +mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv, + struct net_device *ul_dev, + struct mlx5e_tc_tun_encap *encap, + unsigned long event, + struct fib_notifier_info *info) +{ + struct fib_entry_notifier_info *fen_info; + struct mlx5e_tc_fib_event_data *fib_work; + struct mlx5e_route_entry *r; + struct mlx5e_route_key key; + struct net_device *fib_dev; + + fen_info = container_of(info, struct fib_entry_notifier_info, info); + if (fen_info->fi->nh) + return NULL; + fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; + if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops || + fen_info->dst_len != 32) + return NULL; + + fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC); + if (!fib_work) + return ERR_PTR(-ENOMEM); + + key.endpoint_ip.v4 = htonl(fen_info->dst); + key.ip_version = 4; + + /* Can't fail after this point because releasing reference to r + * requires obtaining sleeping mutex which we can't do in atomic + * context. + */ + r = mlx5e_route_lookup_for_update(encap, &key); + if (!r) + goto out; + fib_work->r = r; + dev_hold(ul_dev); + + return fib_work; + +out: + kfree(fib_work); + return NULL; +} + +static struct mlx5e_tc_fib_event_data * +mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv, + struct net_device *ul_dev, + struct mlx5e_tc_tun_encap *encap, + unsigned long event, + struct fib_notifier_info *info) +{ + struct fib6_entry_notifier_info *fen_info; + struct mlx5e_tc_fib_event_data *fib_work; + struct mlx5e_route_entry *r; + struct mlx5e_route_key key; + struct net_device *fib_dev; + + fen_info = container_of(info, struct fib6_entry_notifier_info, info); + fib_dev = fib6_info_nh_dev(fen_info->rt); + if (fib_dev->netdev_ops != &mlx5e_netdev_ops || + fen_info->rt->fib6_dst.plen != 128) + return NULL; + + fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC); + if (!fib_work) + return ERR_PTR(-ENOMEM); + + memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr, + sizeof(fen_info->rt->fib6_dst.addr)); + key.ip_version = 6; + + /* Can't fail after this point because releasing reference to r + * requires obtaining sleeping mutex which we can't do in atomic + * context. + */ + r = mlx5e_route_lookup_for_update(encap, &key); + if (!r) + goto out; + fib_work->r = r; + dev_hold(ul_dev); + + return fib_work; + +out: + kfree(fib_work); + return NULL; +} + +static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr) +{ + struct mlx5e_tc_fib_event_data *fib_work; + struct fib_notifier_info *info = ptr; + struct mlx5e_tc_tun_encap *encap; + struct net_device *ul_dev; + struct mlx5e_priv *priv; + + encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb); + priv = encap->priv; + ul_dev = priv->netdev; + priv = netdev_priv(ul_dev); + + switch (event) { + case FIB_EVENT_ENTRY_REPLACE: + case FIB_EVENT_ENTRY_DEL: + if (info->family == AF_INET) + fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info); + else if (info->family == AF_INET6) + fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info); + else + return NOTIFY_DONE; + + if (!IS_ERR_OR_NULL(fib_work)) { + queue_work(priv->wq, &fib_work->work); + } else if (IS_ERR(fib_work)) { + NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work"); + mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n", + PTR_ERR(fib_work)); + } + + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_DONE; +} + +struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_tun_encap *encap; + int err; + + encap = kvzalloc(sizeof(*encap), GFP_KERNEL); + if (!encap) + return ERR_PTR(-ENOMEM); + + encap->priv = priv; + encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event; + spin_lock_init(&encap->route_lock); + hash_init(encap->route_tbl); + err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb, + NULL, NULL); + if (err) { + kvfree(encap); + return ERR_PTR(err); + } + + return encap; +} + +void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap) +{ + if (!encap) + return; + + unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb); + flush_workqueue(encap->priv->wq); /* flush fib event works */ + kvfree(encap); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h new file mode 100644 index 0000000000..5d7d67687c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TC_TUN_ENCAP_H__ +#define __MLX5_EN_TC_TUN_ENCAP_H__ + +#include "tc_priv.h" + +void mlx5e_detach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + int out_index); + +int mlx5e_attach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct net_device *mirred_dev, + int out_index, + struct netlink_ext_ack *extack, + struct net_device **encap_dev); + +int mlx5e_attach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack); +void mlx5e_detach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); + +int mlx5e_attach_decap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); +void mlx5e_detach_decap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); + +int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct netlink_ext_ack *extack, + bool *vf_tun); +void mlx5e_tc_tun_encap_dests_unset(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr); + +struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info); + +int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec); + +struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv); +void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap); + +#endif /* __MLX5_EN_TC_TUN_ENCAP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c new file mode 100644 index 0000000000..2bcd10b6d6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c @@ -0,0 +1,353 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include +#include "lib/geneve.h" +#include "en/tc_tun.h" + +#define MLX5E_GENEVE_VER 0 + +static bool mlx5e_tc_tun_can_offload_geneve(struct mlx5e_priv *priv) +{ + return !!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_GENEVE); +} + +static int mlx5e_tc_tun_calc_hlen_geneve(struct mlx5e_encap_entry *e) +{ + return sizeof(struct udphdr) + + sizeof(struct genevehdr) + + e->tun_info->options_len; +} + +static int mlx5e_tc_tun_check_udp_dport_geneve(struct mlx5e_priv *priv, + struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_ports enc_ports; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) + return -EOPNOTSUPP; + + flow_rule_match_enc_ports(rule, &enc_ports); + + /* Currently we support only default GENEVE + * port, so udp dst port must match. + */ + if (be16_to_cpu(enc_ports.key->dst) != GENEVE_UDP_PORT) { + NL_SET_ERR_MSG_MOD(extack, + "Matched UDP dst port is not registered as a GENEVE port"); + netdev_warn(priv->netdev, + "UDP port %d is not registered as a GENEVE port\n", + be16_to_cpu(enc_ports.key->dst)); + return -EOPNOTSUPP; + } + + return 0; +} + +static int mlx5e_tc_tun_parse_udp_ports_geneve(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + int err; + + err = mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v); + if (err) + return err; + + return mlx5e_tc_tun_check_udp_dport_geneve(priv, f); +} + +static int mlx5e_tc_tun_init_encap_attr_geneve(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + e->tunnel = &geneve_tunnel; + + /* Reformat type for GENEVE encap is similar to VXLAN: + * in both cases the HW adds in the same place a + * defined encapsulation header that the SW provides. + */ + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; + return 0; +} + +static void mlx5e_tunnel_id_to_vni(__be64 tun_id, __u8 *vni) +{ +#ifdef __BIG_ENDIAN + vni[0] = (__force __u8)(tun_id >> 16); + vni[1] = (__force __u8)(tun_id >> 8); + vni[2] = (__force __u8)tun_id; +#else + vni[0] = (__force __u8)((__force u64)tun_id >> 40); + vni[1] = (__force __u8)((__force u64)tun_id >> 48); + vni[2] = (__force __u8)((__force u64)tun_id >> 56); +#endif +} + +static int mlx5e_gen_ip_tunnel_header_geneve(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *e) +{ + const struct ip_tunnel_info *tun_info = e->tun_info; + struct udphdr *udp = (struct udphdr *)(buf); + struct genevehdr *geneveh; + + geneveh = (struct genevehdr *)((char *)udp + sizeof(struct udphdr)); + + *ip_proto = IPPROTO_UDP; + + udp->dest = tun_info->key.tp_dst; + + memset(geneveh, 0, sizeof(*geneveh)); + geneveh->ver = MLX5E_GENEVE_VER; + geneveh->opt_len = tun_info->options_len / 4; + geneveh->oam = !!(tun_info->key.tun_flags & TUNNEL_OAM); + geneveh->critical = !!(tun_info->key.tun_flags & TUNNEL_CRIT_OPT); + mlx5e_tunnel_id_to_vni(tun_info->key.tun_id, geneveh->vni); + geneveh->proto_type = htons(ETH_P_TEB); + + if (tun_info->key.tun_flags & TUNNEL_GENEVE_OPT) { + if (!geneveh->opt_len) + return -EOPNOTSUPP; + ip_tunnel_info_opts_get(geneveh->options, tun_info); + } + + return 0; +} + +static int mlx5e_tc_tun_parse_geneve_vni(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_enc_keyid enc_keyid; + void *misc_c, *misc_v; + + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) + return 0; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + + if (!enc_keyid.mask->keyid) + return 0; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_geneve_vni)) { + NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE VNI is not supported"); + netdev_warn(priv->netdev, "Matching on GENEVE VNI is not supported\n"); + return -EOPNOTSUPP; + } + + MLX5_SET(fte_match_set_misc, misc_c, geneve_vni, be32_to_cpu(enc_keyid.mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, geneve_vni, be32_to_cpu(enc_keyid.key->keyid)); + + return 0; +} + +static int mlx5e_tc_tun_parse_geneve_options(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f) +{ + u8 max_tlv_option_data_len = MLX5_CAP_GEN(priv->mdev, max_geneve_tlv_option_data_len); + u8 max_tlv_options = MLX5_CAP_GEN(priv->mdev, max_geneve_tlv_options); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + void *misc_c, *misc_v, *misc_3_c, *misc_3_v; + struct geneve_opt *option_key, *option_mask; + __be32 opt_data_key = 0, opt_data_mask = 0; + struct flow_match_enc_opts enc_opts; + int res = 0; + + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + misc_3_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_3); + misc_3_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_3); + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) + return 0; + + flow_rule_match_enc_opts(rule, &enc_opts); + + if (memchr_inv(&enc_opts.mask->data, 0, sizeof(enc_opts.mask->data)) && + !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.geneve_tlv_option_0_data)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options is not supported"); + netdev_warn(priv->netdev, + "Matching on GENEVE options is not supported\n"); + return -EOPNOTSUPP; + } + + /* make sure that we're talking about GENEVE options */ + + if (enc_opts.key->dst_opt_type != TUNNEL_GENEVE_OPT) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: option type is not GENEVE"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: option type is not GENEVE\n"); + return -EOPNOTSUPP; + } + + if (enc_opts.mask->len && + !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.outer_geneve_opt_len)) { + NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE options len is not supported"); + netdev_warn(priv->netdev, + "Matching on GENEVE options len is not supported\n"); + return -EOPNOTSUPP; + } + + /* max_geneve_tlv_option_data_len comes in multiples of 4 bytes, and it + * doesn't include the TLV option header. 'geneve_opt_len' is a total + * len of all the options, including the headers, also multiples of 4 + * bytes. Len that comes from the dissector is in bytes. + */ + + if ((enc_opts.key->len / 4) > ((max_tlv_option_data_len + 1) * max_tlv_options)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: unsupported options len"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: unsupported options len (len=%d)\n", + enc_opts.key->len); + return -EOPNOTSUPP; + } + + MLX5_SET(fte_match_set_misc, misc_c, geneve_opt_len, enc_opts.mask->len / 4); + MLX5_SET(fte_match_set_misc, misc_v, geneve_opt_len, enc_opts.key->len / 4); + + /* we support matching on one option only, so just get it */ + option_key = (struct geneve_opt *)&enc_opts.key->data[0]; + option_mask = (struct geneve_opt *)&enc_opts.mask->data[0]; + + if (option_mask->opt_class == 0 && option_mask->type == 0 && + !memchr_inv(option_mask->opt_data, 0, option_mask->length * 4)) + return 0; + + if (option_key->length > max_tlv_option_data_len) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: unsupported option len"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: unsupported option len (key=%d, mask=%d)\n", + option_key->length, option_mask->length); + return -EOPNOTSUPP; + } + + /* data can't be all 0 - fail to offload such rule */ + if (!memchr_inv(option_key->opt_data, 0, option_key->length * 4)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: can't match on 0 data field"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: can't match on 0 data field\n"); + return -EOPNOTSUPP; + } + + /* add new GENEVE TLV options object */ + res = mlx5_geneve_tlv_option_add(priv->mdev->geneve, option_key); + if (res) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: failed creating TLV opt object"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: failed creating TLV opt object (class:type:len = 0x%x:0x%x:%d)\n", + be16_to_cpu(option_key->opt_class), + option_key->type, option_key->length); + return res; + } + + /* In general, after creating the object, need to query it + * in order to check which option data to set in misc3. + * But we support only geneve_tlv_option_0_data, so no + * point querying at this stage. + */ + + memcpy(&opt_data_key, option_key->opt_data, option_key->length * 4); + memcpy(&opt_data_mask, option_mask->opt_data, option_mask->length * 4); + MLX5_SET(fte_match_set_misc3, misc_3_v, + geneve_tlv_option_0_data, be32_to_cpu(opt_data_key)); + MLX5_SET(fte_match_set_misc3, misc_3_c, + geneve_tlv_option_0_data, be32_to_cpu(opt_data_mask)); + if (MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.geneve_tlv_option_0_exist)) { + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_tlv_option_0_exist); + MLX5_SET_TO_ONES(fte_match_set_misc, misc_v, geneve_tlv_option_0_exist); + } + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3; + + return 0; +} + +static int mlx5e_tc_tun_parse_geneve_params(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f) +{ + void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + struct netlink_ext_ack *extack = f->common.extack; + + /* match on OAM - packets with OAM bit on should NOT be offloaded */ + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_geneve_oam)) { + NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE OAM is not supported"); + netdev_warn(priv->netdev, "Matching on GENEVE OAM is not supported\n"); + return -EOPNOTSUPP; + } + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_oam); + MLX5_SET(fte_match_set_misc, misc_v, geneve_oam, 0); + + /* Match on GENEVE protocol. We support only Transparent Eth Bridge. */ + + if (MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.outer_geneve_protocol_type)) { + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_protocol_type); + MLX5_SET(fte_match_set_misc, misc_v, geneve_protocol_type, ETH_P_TEB); + } + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + + return 0; +} + +static int mlx5e_tc_tun_parse_geneve(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + int err; + + err = mlx5e_tc_tun_parse_geneve_params(priv, spec, f); + if (err) + return err; + + err = mlx5e_tc_tun_parse_geneve_vni(priv, spec, f); + if (err) + return err; + + return mlx5e_tc_tun_parse_geneve_options(priv, spec, f); +} + +static bool mlx5e_tc_tun_encap_info_equal_geneve(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b) +{ + return mlx5e_tc_tun_encap_info_equal_options(a, b, TUNNEL_GENEVE_OPT); +} + +struct mlx5e_tc_tunnel geneve_tunnel = { + .tunnel_type = MLX5E_TC_TUNNEL_TYPE_GENEVE, + .match_level = MLX5_MATCH_L4, + .can_offload = mlx5e_tc_tun_can_offload_geneve, + .calc_hlen = mlx5e_tc_tun_calc_hlen_geneve, + .init_encap_attr = mlx5e_tc_tun_init_encap_attr_geneve, + .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_geneve, + .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_geneve, + .parse_tunnel = mlx5e_tc_tun_parse_geneve, + .encap_info_equal = mlx5e_tc_tun_encap_info_equal_geneve, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c new file mode 100644 index 0000000000..ada14f0574 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include +#include "en/tc_tun.h" + +static bool mlx5e_tc_tun_can_offload_gretap(struct mlx5e_priv *priv) +{ + return !!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap); +} + +static int mlx5e_tc_tun_calc_hlen_gretap(struct mlx5e_encap_entry *e) +{ + return gre_calc_hlen(e->tun_info->key.tun_flags); +} + +static int mlx5e_tc_tun_init_encap_attr_gretap(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + e->tunnel = &gre_tunnel; + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE; + return 0; +} + +static int mlx5e_gen_ip_tunnel_header_gretap(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *e) +{ + const struct ip_tunnel_key *tun_key = &e->tun_info->key; + struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf); + __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); + int hdr_len; + + *ip_proto = IPPROTO_GRE; + + /* the HW does not calculate GRE csum or sequences */ + if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ)) + return -EOPNOTSUPP; + + greh->protocol = htons(ETH_P_TEB); + + /* GRE key */ + hdr_len = mlx5e_tc_tun_calc_hlen_gretap(e); + greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags); + if (tun_key->tun_flags & TUNNEL_KEY) { + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); + *ptr = tun_id; + } + + return 0; +} + +static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_GRE); + + /* gre protocol */ + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol); + MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB); + + /* gre key */ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid enc_keyid; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + MLX5_SET(fte_match_set_misc, misc_c, + gre_key.key, be32_to_cpu(enc_keyid.mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, + gre_key.key, be32_to_cpu(enc_keyid.key->keyid)); + } + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + + return 0; +} + +struct mlx5e_tc_tunnel gre_tunnel = { + .tunnel_type = MLX5E_TC_TUNNEL_TYPE_GRETAP, + .match_level = MLX5_MATCH_L3, + .can_offload = mlx5e_tc_tun_can_offload_gretap, + .calc_hlen = mlx5e_tc_tun_calc_hlen_gretap, + .init_encap_attr = mlx5e_tc_tun_init_encap_attr_gretap, + .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_gretap, + .parse_udp_ports = NULL, + .parse_tunnel = mlx5e_tc_tun_parse_gretap, + .encap_info_equal = mlx5e_tc_tun_encap_info_equal_generic, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c new file mode 100644 index 0000000000..c5b1617d55 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include +#include +#include "en/tc_tun.h" + +static bool can_offload(struct mlx5e_priv *priv) +{ + return MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_l3_tunnel_to_l2); +} + +static int calc_hlen(struct mlx5e_encap_entry *e) +{ + return sizeof(struct udphdr) + MPLS_HLEN; +} + +static int init_encap_attr(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + e->tunnel = &mplsoudp_tunnel; + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL; + return 0; +} + +static int generate_ip_tun_hdr(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *r) +{ + const struct ip_tunnel_key *tun_key = &r->tun_info->key; + const struct mlx5e_mpls_info *mpls_info = &r->mpls_info; + struct udphdr *udp = (struct udphdr *)(buf); + struct mpls_shim_hdr *mpls; + + mpls = (struct mpls_shim_hdr *)(udp + 1); + *ip_proto = IPPROTO_UDP; + + udp->dest = tun_key->tp_dst; + *mpls = mpls_entry_encode(mpls_info->label, mpls_info->ttl, mpls_info->tc, mpls_info->bos); + + return 0; +} + +static int parse_udp_ports(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + return mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v); +} + +static int parse_tunnel(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_match_mpls match; + void *misc2_c; + void *misc2_v; + + if (!MLX5_CAP_ETH(priv->mdev, tunnel_stateless_mpls_over_udp) && + !(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_CW_MPLS_UDP)) + return -EOPNOTSUPP; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) + return -EOPNOTSUPP; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS)) + return 0; + + flow_rule_match_mpls(rule, &match); + + /* Only support matching the first LSE */ + if (match.mask->used_lses != 1) + return -EOPNOTSUPP; + + misc2_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + misc2_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + + MLX5_SET(fte_match_set_misc2, misc2_c, + outer_first_mpls_over_udp.mpls_label, + match.mask->ls[0].mpls_label); + MLX5_SET(fte_match_set_misc2, misc2_v, + outer_first_mpls_over_udp.mpls_label, + match.key->ls[0].mpls_label); + + MLX5_SET(fte_match_set_misc2, misc2_c, + outer_first_mpls_over_udp.mpls_exp, + match.mask->ls[0].mpls_tc); + MLX5_SET(fte_match_set_misc2, misc2_v, + outer_first_mpls_over_udp.mpls_exp, match.key->ls[0].mpls_tc); + + MLX5_SET(fte_match_set_misc2, misc2_c, + outer_first_mpls_over_udp.mpls_s_bos, + match.mask->ls[0].mpls_bos); + MLX5_SET(fte_match_set_misc2, misc2_v, + outer_first_mpls_over_udp.mpls_s_bos, + match.key->ls[0].mpls_bos); + + MLX5_SET(fte_match_set_misc2, misc2_c, + outer_first_mpls_over_udp.mpls_ttl, + match.mask->ls[0].mpls_ttl); + MLX5_SET(fte_match_set_misc2, misc2_v, + outer_first_mpls_over_udp.mpls_ttl, + match.key->ls[0].mpls_ttl); + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + + return 0; +} + +struct mlx5e_tc_tunnel mplsoudp_tunnel = { + .tunnel_type = MLX5E_TC_TUNNEL_TYPE_MPLSOUDP, + .match_level = MLX5_MATCH_L4, + .can_offload = can_offload, + .calc_hlen = calc_hlen, + .init_encap_attr = init_encap_attr, + .generate_ip_tun_hdr = generate_ip_tun_hdr, + .parse_udp_ports = parse_udp_ports, + .parse_tunnel = parse_tunnel, + .encap_info_equal = mlx5e_tc_tun_encap_info_equal_generic, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c new file mode 100644 index 0000000000..a184d739d5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c @@ -0,0 +1,233 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include +#include +#include "lib/vxlan.h" +#include "en/tc_tun.h" + +static bool mlx5e_tc_tun_can_offload_vxlan(struct mlx5e_priv *priv) +{ + return !!MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap); +} + +static int mlx5e_tc_tun_calc_hlen_vxlan(struct mlx5e_encap_entry *e) +{ + return VXLAN_HLEN; +} + +static int mlx5e_tc_tun_check_udp_dport_vxlan(struct mlx5e_priv *priv, + struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_ports enc_ports; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) + return -EOPNOTSUPP; + + flow_rule_match_enc_ports(rule, &enc_ports); + + /* check the UDP destination port validity */ + + if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, + be16_to_cpu(enc_ports.key->dst))) { + NL_SET_ERR_MSG_MOD(extack, + "Matched UDP dst port is not registered as a VXLAN port"); + netdev_warn(priv->netdev, + "UDP port %d is not registered as a VXLAN port\n", + be16_to_cpu(enc_ports.key->dst)); + return -EOPNOTSUPP; + } + + return 0; +} + +static int mlx5e_tc_tun_parse_udp_ports_vxlan(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + int err = 0; + + err = mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v); + if (err) + return err; + + return mlx5e_tc_tun_check_udp_dport_vxlan(priv, f); +} + +static int mlx5e_tc_tun_init_encap_attr_vxlan(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + int dst_port = be16_to_cpu(e->tun_info->key.tp_dst); + + e->tunnel = &vxlan_tunnel; + + if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) { + NL_SET_ERR_MSG_MOD(extack, + "vxlan udp dport was not registered with the HW"); + netdev_warn(priv->netdev, + "%d isn't an offloaded vxlan udp dport\n", + dst_port); + return -EOPNOTSUPP; + } + + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; + return 0; +} + +static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *e) +{ + const struct ip_tunnel_key *tun_key = &e->tun_info->key; + __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); + struct udphdr *udp = (struct udphdr *)(buf); + const struct vxlan_metadata *md; + struct vxlanhdr *vxh; + + if ((tun_key->tun_flags & TUNNEL_VXLAN_OPT) && + e->tun_info->options_len != sizeof(*md)) + return -EOPNOTSUPP; + vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); + *ip_proto = IPPROTO_UDP; + + udp->dest = tun_key->tp_dst; + vxh->vx_flags = VXLAN_HF_VNI; + vxh->vx_vni = vxlan_vni_field(tun_id); + if (tun_key->tun_flags & TUNNEL_VXLAN_OPT) { + md = ip_tunnel_info_opts(e->tun_info); + vxlan_build_gbp_hdr(vxh, md); + } + + return 0; +} + +static int mlx5e_tc_tun_parse_vxlan_gbp_option(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_enc_opts enc_opts; + void *misc5_c, *misc5_v; + u32 *gbp, *gbp_mask; + + flow_rule_match_enc_opts(rule, &enc_opts); + + if (memchr_inv(&enc_opts.mask->data, 0, sizeof(enc_opts.mask->data)) && + !MLX5_CAP_ESW_FT_FIELD_SUPPORT_2(priv->mdev, tunnel_header_0_1)) { + NL_SET_ERR_MSG_MOD(extack, "Matching on VxLAN GBP is not supported"); + return -EOPNOTSUPP; + } + + if (enc_opts.key->dst_opt_type != TUNNEL_VXLAN_OPT) { + NL_SET_ERR_MSG_MOD(extack, "Wrong VxLAN option type: not GBP"); + return -EOPNOTSUPP; + } + + if (enc_opts.key->len != sizeof(*gbp) || + enc_opts.mask->len != sizeof(*gbp_mask)) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN GBP option/mask len is not 32 bits"); + return -EINVAL; + } + + gbp = (u32 *)&enc_opts.key->data[0]; + gbp_mask = (u32 *)&enc_opts.mask->data[0]; + + if (*gbp_mask & ~VXLAN_GBP_MASK) { + NL_SET_ERR_MSG_FMT_MOD(extack, "Wrong VxLAN GBP mask(0x%08X)\n", *gbp_mask); + return -EINVAL; + } + + misc5_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_5); + misc5_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_5); + MLX5_SET(fte_match_set_misc5, misc5_c, tunnel_header_0, *gbp_mask); + MLX5_SET(fte_match_set_misc5, misc5_v, tunnel_header_0, *gbp); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5; + + return 0; +} + +static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_enc_keyid enc_keyid; + void *misc_c, *misc_v; + + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) + return 0; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + + if (!enc_keyid.mask->keyid) + return 0; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) { + int err; + + err = mlx5e_tc_tun_parse_vxlan_gbp_option(priv, spec, f); + if (err) + return err; + } + + /* match on VNI is required */ + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.outer_vxlan_vni)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on VXLAN VNI is not supported"); + netdev_warn(priv->netdev, + "Matching on VXLAN VNI is not supported\n"); + return -EOPNOTSUPP; + } + + MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, + be32_to_cpu(enc_keyid.mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, + be32_to_cpu(enc_keyid.key->keyid)); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + + return 0; +} + +static bool mlx5e_tc_tun_encap_info_equal_vxlan(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b) +{ + return mlx5e_tc_tun_encap_info_equal_options(a, b, TUNNEL_VXLAN_OPT); +} + +static int mlx5e_tc_tun_get_remote_ifindex(struct net_device *mirred_dev) +{ + const struct vxlan_dev *vxlan = netdev_priv(mirred_dev); + const struct vxlan_rdst *dst = &vxlan->default_dst; + + return dst->remote_ifindex; +} + +struct mlx5e_tc_tunnel vxlan_tunnel = { + .tunnel_type = MLX5E_TC_TUNNEL_TYPE_VXLAN, + .match_level = MLX5_MATCH_L4, + .can_offload = mlx5e_tc_tun_can_offload_vxlan, + .calc_hlen = mlx5e_tc_tun_calc_hlen_vxlan, + .init_encap_attr = mlx5e_tc_tun_init_encap_attr_vxlan, + .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_vxlan, + .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_vxlan, + .parse_tunnel = mlx5e_tc_tun_parse_vxlan, + .encap_info_equal = mlx5e_tc_tun_encap_info_equal_vxlan, + .get_remote_ifindex = mlx5e_tc_tun_get_remote_ifindex, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c new file mode 100644 index 0000000000..d4239e3b3c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#include "tir.h" +#include "params.h" +#include + +#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) + +/* max() doesn't work inside square brackets. */ +#define MLX5E_TIR_CMD_IN_SZ_DW ( \ + MLX5_ST_SZ_DW(create_tir_in) > MLX5_ST_SZ_DW(modify_tir_in) ? \ + MLX5_ST_SZ_DW(create_tir_in) : MLX5_ST_SZ_DW(modify_tir_in) \ +) + +struct mlx5e_tir_builder { + u32 in[MLX5E_TIR_CMD_IN_SZ_DW]; + bool modify; +}; + +struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify) +{ + struct mlx5e_tir_builder *builder; + + builder = kvzalloc(sizeof(*builder), GFP_KERNEL); + builder->modify = modify; + + return builder; +} + +void mlx5e_tir_builder_free(struct mlx5e_tir_builder *builder) +{ + kvfree(builder); +} + +void mlx5e_tir_builder_clear(struct mlx5e_tir_builder *builder) +{ + memset(builder->in, 0, sizeof(builder->in)); +} + +static void *mlx5e_tir_builder_get_tirc(struct mlx5e_tir_builder *builder) +{ + if (builder->modify) + return MLX5_ADDR_OF(modify_tir_in, builder->in, ctx); + return MLX5_ADDR_OF(create_tir_in, builder->in, ctx); +} + +void mlx5e_tir_builder_build_inline(struct mlx5e_tir_builder *builder, u32 tdn, u32 rqn) +{ + void *tirc = mlx5e_tir_builder_get_tirc(builder); + + WARN_ON(builder->modify); + + MLX5_SET(tirc, tirc, transport_domain, tdn); + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); + MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_NONE); + MLX5_SET(tirc, tirc, inline_rqn, rqn); +} + +void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn, + u32 rqtn, bool inner_ft_support) +{ + void *tirc = mlx5e_tir_builder_get_tirc(builder); + + WARN_ON(builder->modify); + + MLX5_SET(tirc, tirc, transport_domain, tdn); + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, rqtn); + MLX5_SET(tirc, tirc, tunneled_offload_en, inner_ft_support); +} + +void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder, + const struct mlx5e_packet_merge_param *pkt_merge_param) +{ + void *tirc = mlx5e_tir_builder_get_tirc(builder); + const unsigned int rough_max_l2_l3_hdr_sz = 256; + + if (builder->modify) + MLX5_SET(modify_tir_in, builder->in, bitmask.packet_merge, 1); + + switch (pkt_merge_param->type) { + case MLX5E_PACKET_MERGE_LRO: + MLX5_SET(tirc, tirc, packet_merge_mask, + MLX5_TIRC_PACKET_MERGE_MASK_IPV4_LRO | + MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO); + MLX5_SET(tirc, tirc, lro_max_ip_payload_size, + (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - rough_max_l2_l3_hdr_sz) >> 8); + MLX5_SET(tirc, tirc, lro_timeout_period_usecs, pkt_merge_param->timeout); + break; + default: + break; + } +} + +static int mlx5e_hfunc_to_hw(u8 hfunc) +{ + switch (hfunc) { + case ETH_RSS_HASH_TOP: + return MLX5_RX_HASH_FN_TOEPLITZ; + case ETH_RSS_HASH_XOR: + return MLX5_RX_HASH_FN_INVERTED_XOR8; + default: + return MLX5_RX_HASH_FN_NONE; + } +} + +void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder, + const struct mlx5e_rss_params_hash *rss_hash, + const struct mlx5e_rss_params_traffic_type *rss_tt, + bool inner) +{ + void *tirc = mlx5e_tir_builder_get_tirc(builder); + void *hfso; + + if (builder->modify) + MLX5_SET(modify_tir_in, builder->in, bitmask.hash, 1); + + MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_hfunc_to_hw(rss_hash->hfunc)); + if (rss_hash->hfunc == ETH_RSS_HASH_TOP) { + const size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key); + void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); + + MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); + memcpy(rss_key, rss_hash->toeplitz_hash_key, len); + } + + if (inner) + hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner); + else + hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, rss_tt->l3_prot_type); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, rss_tt->l4_prot_type); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, rss_tt->rx_hash_fields); +} + +void mlx5e_tir_builder_build_direct(struct mlx5e_tir_builder *builder) +{ + void *tirc = mlx5e_tir_builder_get_tirc(builder); + + WARN_ON(builder->modify); + + MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); +} + +void mlx5e_tir_builder_build_tls(struct mlx5e_tir_builder *builder) +{ + void *tirc = mlx5e_tir_builder_get_tirc(builder); + + WARN_ON(builder->modify); + + MLX5_SET(tirc, tirc, tls_en, 1); + MLX5_SET(tirc, tirc, self_lb_block, + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST | + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST); +} + +int mlx5e_tir_init(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder, + struct mlx5_core_dev *mdev, bool reg) +{ + int err; + + tir->mdev = mdev; + + err = mlx5_core_create_tir(tir->mdev, builder->in, &tir->tirn); + if (err) + return err; + + if (reg) { + struct mlx5e_hw_objs *res = &tir->mdev->mlx5e_res.hw_objs; + + mutex_lock(&res->td.list_lock); + list_add(&tir->list, &res->td.tirs_list); + mutex_unlock(&res->td.list_lock); + } else { + INIT_LIST_HEAD(&tir->list); + } + + return 0; +} + +void mlx5e_tir_destroy(struct mlx5e_tir *tir) +{ + struct mlx5e_hw_objs *res = &tir->mdev->mlx5e_res.hw_objs; + + /* Skip mutex if list_del is no-op (the TIR wasn't registered in the + * list). list_empty will never return true for an item of tirs_list, + * and READ_ONCE/WRITE_ONCE in list_empty/list_del guarantee consistency + * of the list->next value. + */ + if (!list_empty(&tir->list)) { + mutex_lock(&res->td.list_lock); + list_del(&tir->list); + mutex_unlock(&res->td.list_lock); + } + + mlx5_core_destroy_tir(tir->mdev, tir->tirn); +} + +int mlx5e_tir_modify(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder) +{ + return mlx5_core_modify_tir(tir->mdev, tir->tirn, builder->in); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h new file mode 100644 index 0000000000..857a84bcd5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_EN_TIR_H__ +#define __MLX5_EN_TIR_H__ + +#include + +struct mlx5e_rss_params_hash { + u8 hfunc; + u8 toeplitz_hash_key[40]; +}; + +struct mlx5e_rss_params_traffic_type { + u8 l3_prot_type; + u8 l4_prot_type; + u32 rx_hash_fields; +}; + +struct mlx5e_tir_builder; +struct mlx5e_packet_merge_param; + +struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify); +void mlx5e_tir_builder_free(struct mlx5e_tir_builder *builder); +void mlx5e_tir_builder_clear(struct mlx5e_tir_builder *builder); + +void mlx5e_tir_builder_build_inline(struct mlx5e_tir_builder *builder, u32 tdn, u32 rqn); +void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn, + u32 rqtn, bool inner_ft_support); +void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder, + const struct mlx5e_packet_merge_param *pkt_merge_param); +void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder, + const struct mlx5e_rss_params_hash *rss_hash, + const struct mlx5e_rss_params_traffic_type *rss_tt, + bool inner); +void mlx5e_tir_builder_build_direct(struct mlx5e_tir_builder *builder); +void mlx5e_tir_builder_build_tls(struct mlx5e_tir_builder *builder); + +struct mlx5_core_dev; + +struct mlx5e_tir { + struct mlx5_core_dev *mdev; + u32 tirn; + struct list_head list; +}; + +int mlx5e_tir_init(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder, + struct mlx5_core_dev *mdev, bool reg); +void mlx5e_tir_destroy(struct mlx5e_tir *tir); + +static inline u32 mlx5e_tir_get_tirn(struct mlx5e_tir *tir) +{ + return tir->tirn; +} + +int mlx5e_tir_modify(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder); + +#endif /* __MLX5_EN_TIR_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c new file mode 100644 index 0000000000..5620d9f975 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -0,0 +1,330 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies */ + +#include "en/txrx.h" +#include "en/params.h" +#include "en/trap.h" + +static int mlx5e_trap_napi_poll(struct napi_struct *napi, int budget) +{ + struct mlx5e_trap *trap_ctx = container_of(napi, struct mlx5e_trap, napi); + struct mlx5e_ch_stats *ch_stats = trap_ctx->stats; + struct mlx5e_rq *rq = &trap_ctx->rq; + bool busy = false; + int work_done = 0; + + rcu_read_lock(); + + ch_stats->poll++; + + work_done = mlx5e_poll_rx_cq(&rq->cq, budget); + busy |= work_done == budget; + busy |= rq->post_wqes(rq); + + if (busy) { + work_done = budget; + goto out; + } + + if (unlikely(!napi_complete_done(napi, work_done))) + goto out; + + mlx5e_cq_arm(&rq->cq); + +out: + rcu_read_unlock(); + return work_done; +} + +static void mlx5e_init_trap_rq(struct mlx5e_trap *t, struct mlx5e_params *params, + struct mlx5e_rq *rq) +{ + struct mlx5_core_dev *mdev = t->mdev; + struct mlx5e_priv *priv = t->priv; + + rq->wq_type = params->rq_wq_type; + rq->pdev = t->pdev; + rq->netdev = priv->netdev; + rq->priv = priv; + rq->clock = &mdev->clock; + rq->tstamp = &priv->tstamp; + rq->mdev = mdev; + rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + rq->stats = &priv->trap_stats.rq; + rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); + xdp_rxq_info_unused(&rq->xdp_rxq); + mlx5e_rq_set_trap_handlers(rq, params); +} + +static int mlx5e_open_trap_rq(struct mlx5e_priv *priv, struct mlx5e_trap *t) +{ + struct mlx5e_rq_param *rq_param = &t->rq_param; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_create_cq_param ccp = {}; + struct dim_cq_moder trap_moder = {}; + struct mlx5e_rq *rq = &t->rq; + int node; + int err; + + node = dev_to_node(mdev->device); + + ccp.node = node; + ccp.ch_stats = t->stats; + ccp.napi = &t->napi; + ccp.ix = 0; + err = mlx5e_open_cq(priv, trap_moder, &rq_param->cqp, &ccp, &rq->cq); + if (err) + return err; + + mlx5e_init_trap_rq(t, &t->params, rq); + err = mlx5e_open_rq(&t->params, rq_param, NULL, node, rq); + if (err) + goto err_destroy_cq; + + return 0; + +err_destroy_cq: + mlx5e_close_cq(&rq->cq); + + return err; +} + +static void mlx5e_close_trap_rq(struct mlx5e_rq *rq) +{ + mlx5e_close_rq(rq); + mlx5e_close_cq(&rq->cq); +} + +static int mlx5e_create_trap_direct_rq_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, + u32 rqn) +{ + struct mlx5e_tir_builder *builder; + int err; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + mlx5e_tir_builder_build_inline(builder, mdev->mlx5e_res.hw_objs.td.tdn, rqn); + err = mlx5e_tir_init(tir, builder, mdev, true); + + mlx5e_tir_builder_free(builder); + + return err; +} + +static void mlx5e_build_trap_params(struct mlx5_core_dev *mdev, + int max_mtu, u16 q_counter, + struct mlx5e_trap *t) +{ + struct mlx5e_params *params = &t->params; + + params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC; + mlx5e_init_rq_type_params(mdev, params); + params->sw_mtu = max_mtu; + mlx5e_build_rq_param(mdev, params, NULL, q_counter, &t->rq_param); +} + +static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv) +{ + int cpu = mlx5_comp_vector_get_cpu(priv->mdev, 0); + struct net_device *netdev = priv->netdev; + struct mlx5e_trap *t; + int err; + + t = kvzalloc_node(sizeof(*t), GFP_KERNEL, cpu_to_node(cpu)); + if (!t) + return ERR_PTR(-ENOMEM); + + mlx5e_build_trap_params(priv->mdev, netdev->max_mtu, priv->q_counter, t); + + t->priv = priv; + t->mdev = priv->mdev; + t->tstamp = &priv->tstamp; + t->pdev = mlx5_core_dma_dev(priv->mdev); + t->netdev = priv->netdev; + t->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey); + t->stats = &priv->trap_stats.ch; + + netif_napi_add(netdev, &t->napi, mlx5e_trap_napi_poll); + + err = mlx5e_open_trap_rq(priv, t); + if (unlikely(err)) + goto err_napi_del; + + err = mlx5e_create_trap_direct_rq_tir(t->mdev, &t->tir, t->rq.rqn); + if (err) + goto err_close_trap_rq; + + return t; + +err_close_trap_rq: + mlx5e_close_trap_rq(&t->rq); +err_napi_del: + netif_napi_del(&t->napi); + kvfree(t); + return ERR_PTR(err); +} + +void mlx5e_close_trap(struct mlx5e_trap *trap) +{ + mlx5e_tir_destroy(&trap->tir); + mlx5e_close_trap_rq(&trap->rq); + netif_napi_del(&trap->napi); + kvfree(trap); +} + +static void mlx5e_activate_trap(struct mlx5e_trap *trap) +{ + napi_enable(&trap->napi); + mlx5e_activate_rq(&trap->rq); + mlx5e_trigger_napi_sched(&trap->napi); +} + +void mlx5e_deactivate_trap(struct mlx5e_priv *priv) +{ + struct mlx5e_trap *trap = priv->en_trap; + + mlx5e_deactivate_rq(&trap->rq); + napi_disable(&trap->napi); +} + +static struct mlx5e_trap *mlx5e_add_trap_queue(struct mlx5e_priv *priv) +{ + struct mlx5e_trap *trap; + + trap = mlx5e_open_trap(priv); + if (IS_ERR(trap)) + goto out; + + mlx5e_activate_trap(trap); +out: + return trap; +} + +static void mlx5e_del_trap_queue(struct mlx5e_priv *priv) +{ + mlx5e_deactivate_trap(priv); + mlx5e_close_trap(priv->en_trap); + priv->en_trap = NULL; +} + +static int mlx5e_trap_get_tirn(struct mlx5e_trap *en_trap) +{ + return en_trap->tir.tirn; +} + +static int mlx5e_handle_action_trap(struct mlx5e_priv *priv, int trap_id) +{ + bool open_queue = !priv->en_trap; + struct mlx5e_trap *trap; + int err; + + if (open_queue) { + trap = mlx5e_add_trap_queue(priv); + if (IS_ERR(trap)) + return PTR_ERR(trap); + priv->en_trap = trap; + } + + switch (trap_id) { + case DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER: + err = mlx5e_add_vlan_trap(priv->fs, trap_id, mlx5e_trap_get_tirn(priv->en_trap)); + if (err) + goto err_out; + break; + case DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER: + err = mlx5e_add_mac_trap(priv->fs, trap_id, mlx5e_trap_get_tirn(priv->en_trap)); + if (err) + goto err_out; + break; + default: + netdev_warn(priv->netdev, "%s: Unknown trap id %d\n", __func__, trap_id); + err = -EINVAL; + goto err_out; + } + return 0; + +err_out: + if (open_queue) + mlx5e_del_trap_queue(priv); + return err; +} + +static int mlx5e_handle_action_drop(struct mlx5e_priv *priv, int trap_id) +{ + switch (trap_id) { + case DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER: + mlx5e_remove_vlan_trap(priv->fs); + break; + case DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER: + mlx5e_remove_mac_trap(priv->fs); + break; + default: + netdev_warn(priv->netdev, "%s: Unknown trap id %d\n", __func__, trap_id); + return -EINVAL; + } + if (priv->en_trap && !mlx5_devlink_trap_get_num_active(priv->mdev)) + mlx5e_del_trap_queue(priv); + + return 0; +} + +int mlx5e_handle_trap_event(struct mlx5e_priv *priv, struct mlx5_trap_ctx *trap_ctx) +{ + int err = 0; + + /* Traps are unarmed when interface is down, no need to update + * them. The configuration is saved in the core driver, + * queried and applied upon interface up operation in + * mlx5e_open_locked(). + */ + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + switch (trap_ctx->action) { + case DEVLINK_TRAP_ACTION_TRAP: + err = mlx5e_handle_action_trap(priv, trap_ctx->id); + break; + case DEVLINK_TRAP_ACTION_DROP: + err = mlx5e_handle_action_drop(priv, trap_ctx->id); + break; + default: + netdev_warn(priv->netdev, "%s: Unsupported action %d\n", __func__, + trap_ctx->action); + err = -EINVAL; + } + return err; +} + +static int mlx5e_apply_trap(struct mlx5e_priv *priv, int trap_id, bool enable) +{ + enum devlink_trap_action action; + int err; + + err = mlx5_devlink_traps_get_action(priv->mdev, trap_id, &action); + if (err) + return err; + if (action == DEVLINK_TRAP_ACTION_TRAP) + err = enable ? mlx5e_handle_action_trap(priv, trap_id) : + mlx5e_handle_action_drop(priv, trap_id); + return err; +} + +static const int mlx5e_traps_arr[] = { + DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER, + DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER, +}; + +int mlx5e_apply_traps(struct mlx5e_priv *priv, bool enable) +{ + int err; + int i; + + for (i = 0; i < ARRAY_SIZE(mlx5e_traps_arr); i++) { + err = mlx5e_apply_trap(priv, mlx5e_traps_arr[i], enable); + if (err) + return err; + } + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h new file mode 100644 index 0000000000..aa3f17658c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies */ + +#ifndef __MLX5E_TRAP_H__ +#define __MLX5E_TRAP_H__ + +#include "../en.h" +#include "../devlink.h" + +struct mlx5e_trap { + /* data path */ + struct mlx5e_rq rq; + struct mlx5e_tir tir; + struct napi_struct napi; + struct device *pdev; + struct net_device *netdev; + __be32 mkey_be; + + /* data path - accessed per napi poll */ + struct mlx5e_ch_stats *stats; + + /* control */ + struct mlx5e_priv *priv; + struct mlx5_core_dev *mdev; + struct hwtstamp_config *tstamp; + DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES); + + struct mlx5e_params params; + struct mlx5e_rq_param rq_param; +}; + +void mlx5e_close_trap(struct mlx5e_trap *trap); +void mlx5e_deactivate_trap(struct mlx5e_priv *priv); +int mlx5e_handle_trap_event(struct mlx5e_priv *priv, struct mlx5_trap_ctx *trap_ctx); +int mlx5e_apply_traps(struct mlx5e_priv *priv, bool enable); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h new file mode 100644 index 0000000000..879d698b61 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -0,0 +1,509 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TXRX_H___ +#define __MLX5_EN_TXRX_H___ + +#include "en.h" +#include + +#define MLX5E_TX_WQE_EMPTY_DS_COUNT (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + +#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start)) + +/* IPSEC inline data includes: + * 1. ESP trailer: up to 255 bytes of padding, 1 byte for pad length, 1 byte for + * next header. + * 2. ESP authentication data: 16 bytes for ICV. + */ +#define MLX5E_MAX_TX_IPSEC_DS DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + \ + 255 + 1 + 1 + 16, MLX5_SEND_WQE_DS) + +/* 366 should be big enough to cover all L2, L3 and L4 headers with possible + * encapsulations. + */ +#define MLX5E_MAX_TX_INLINE_DS DIV_ROUND_UP(366 - INL_HDR_START_SZ + VLAN_HLEN, \ + MLX5_SEND_WQE_DS) + +/* Sync the calculation with mlx5e_sq_calc_wqe_attr. */ +#define MLX5E_MAX_TX_WQEBBS DIV_ROUND_UP(MLX5E_TX_WQE_EMPTY_DS_COUNT + \ + MLX5E_MAX_TX_INLINE_DS + \ + MLX5E_MAX_TX_IPSEC_DS + \ + MAX_SKB_FRAGS + 1, \ + MLX5_SEND_WQEBB_NUM_DS) + +#define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND) + +static inline +ktime_t mlx5e_cqe_ts_to_ns(cqe_ts_to_ns func, struct mlx5_clock *clock, u64 cqe_ts) +{ + return INDIRECT_CALL_2(func, mlx5_real_time_cyc2time, mlx5_timecounter_cyc2time, + clock, cqe_ts); +} + +enum mlx5e_icosq_wqe_type { + MLX5E_ICOSQ_WQE_NOP, + MLX5E_ICOSQ_WQE_UMR_RX, + MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR, +#ifdef CONFIG_MLX5_EN_TLS + MLX5E_ICOSQ_WQE_UMR_TLS, + MLX5E_ICOSQ_WQE_SET_PSV_TLS, + MLX5E_ICOSQ_WQE_GET_PSV_TLS, +#endif +}; + +/* General */ +static inline bool mlx5e_skb_is_multicast(struct sk_buff *skb) +{ + return skb->pkt_type == PACKET_MULTICAST || skb->pkt_type == PACKET_BROADCAST; +} + +void mlx5e_trigger_irq(struct mlx5e_icosq *sq); +void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe); +void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); +int mlx5e_napi_poll(struct napi_struct *napi, int budget); +int mlx5e_poll_ico_cq(struct mlx5e_cq *cq); + +/* RX */ +INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)); +INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)); +int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); +void mlx5e_free_rx_descs(struct mlx5e_rq *rq); +void mlx5e_free_rx_missing_descs(struct mlx5e_rq *rq); + +static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) +{ + return config->rx_filter == HWTSTAMP_FILTER_ALL; +} + +/* TX */ +struct mlx5e_xmit_data { + dma_addr_t dma_addr; + void *data; + u32 len : 31; + u32 has_frags : 1; +}; + +struct mlx5e_xmit_data_frags { + struct mlx5e_xmit_data xd; + struct skb_shared_info *sinfo; + dma_addr_t *dma_arr; +}; + +netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev); +bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); +void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq); + +static inline bool +mlx5e_skb_fifo_has_room(struct mlx5e_skb_fifo *fifo) +{ + return (u16)(*fifo->pc - *fifo->cc) <= fifo->mask; +} + +static inline bool +mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n) +{ + return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc); +} + +static inline void *mlx5e_fetch_wqe(struct mlx5_wq_cyc *wq, u16 pi, size_t wqe_size) +{ + void *wqe; + + wqe = mlx5_wq_cyc_get_wqe(wq, pi); + memset(wqe, 0, wqe_size); + + return wqe; +} + +#define MLX5E_TX_FETCH_WQE(sq, pi) \ + ((struct mlx5e_tx_wqe *)mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5e_tx_wqe))) + +static inline struct mlx5e_tx_wqe * +mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc) +{ + u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc); + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + + memset(cseg, 0, sizeof(*cseg)); + + cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP); + cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01); + + (*pc)++; + + return wqe; +} + +static inline struct mlx5e_tx_wqe * +mlx5e_post_nop_fence(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc) +{ + u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc); + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + + memset(cseg, 0, sizeof(*cseg)); + + cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP); + cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01); + cseg->fm_ce_se = MLX5_FENCE_MODE_INITIATOR_SMALL; + + (*pc)++; + + return wqe; +} + +struct mlx5e_tx_wqe_info { + struct sk_buff *skb; + u32 num_bytes; + u8 num_wqebbs; + u8 num_dma; + u8 num_fifo_pkts; +#ifdef CONFIG_MLX5_EN_TLS + struct page *resync_dump_frag_page; +#endif +}; + +static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi, contig_wqebbs; + + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + if (unlikely(contig_wqebbs < size)) { + struct mlx5e_tx_wqe_info *wi, *edge_wi; + + wi = &sq->db.wqe_info[pi]; + edge_wi = wi + contig_wqebbs; + + /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */ + for (; wi < edge_wi; wi++) { + *wi = (struct mlx5e_tx_wqe_info) { + .num_wqebbs = 1, + }; + mlx5e_post_nop(wq, sq->sqn, &sq->pc); + } + sq->stats->nop += contig_wqebbs; + + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + } + + return pi; +} + +void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq); + +static inline u16 mlx5e_shampo_get_cqe_header_index(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + return be16_to_cpu(cqe->shampo.header_entry_index) & (rq->mpwqe.shampo->hd_per_wq - 1); +} + +struct mlx5e_shampo_umr { + u16 len; +}; + +struct mlx5e_icosq_wqe_info { + u8 wqe_type; + u8 num_wqebbs; + + /* Auxiliary data for different wqe types. */ + union { + struct { + struct mlx5e_rq *rq; + } umr; + struct mlx5e_shampo_umr shampo; +#ifdef CONFIG_MLX5_EN_TLS + struct { + struct mlx5e_ktls_offload_context_rx *priv_rx; + } tls_set_params; + struct { + struct mlx5e_ktls_rx_resync_buf *buf; + } tls_get_params; +#endif + }; +}; + +void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq); + +static inline u16 mlx5e_icosq_get_next_pi(struct mlx5e_icosq *sq, u16 size) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi, contig_wqebbs; + + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + if (unlikely(contig_wqebbs < size)) { + struct mlx5e_icosq_wqe_info *wi, *edge_wi; + + wi = &sq->db.wqe_info[pi]; + edge_wi = wi + contig_wqebbs; + + /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */ + for (; wi < edge_wi; wi++) { + *wi = (struct mlx5e_icosq_wqe_info) { + .wqe_type = MLX5E_ICOSQ_WQE_NOP, + .num_wqebbs = 1, + }; + mlx5e_post_nop(wq, sq->sqn, &sq->pc); + } + + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + } + + return pi; +} + +static inline void +mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map, + struct mlx5_wqe_ctrl_seg *ctrl) +{ + ctrl->fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE; + /* ensure wqe is visible to device before updating doorbell record */ + dma_wmb(); + + *wq->db = cpu_to_be32(pc); + + /* ensure doorbell record is visible to device before ringing the + * doorbell + */ + wmb(); + + mlx5_write64((__be32 *)ctrl, uar_map); +} + +static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) +{ + struct mlx5_core_cq *mcq; + + mcq = &cq->mcq; + mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc); +} + +static inline struct mlx5e_sq_dma * +mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i) +{ + return &sq->db.dma_fifo[i & sq->dma_fifo_mask]; +} + +static inline void +mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size, + enum mlx5e_dma_map_type map_type) +{ + struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++); + + dma->addr = addr; + dma->size = size; + dma->type = map_type; +} + +static inline +struct sk_buff **mlx5e_skb_fifo_get(struct mlx5e_skb_fifo *fifo, u16 i) +{ + return &fifo->fifo[i & fifo->mask]; +} + +static inline +void mlx5e_skb_fifo_push(struct mlx5e_skb_fifo *fifo, struct sk_buff *skb) +{ + struct sk_buff **skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++); + + *skb_item = skb; +} + +static inline +struct sk_buff *mlx5e_skb_fifo_pop(struct mlx5e_skb_fifo *fifo) +{ + WARN_ON_ONCE(*fifo->pc == *fifo->cc); + + return *mlx5e_skb_fifo_get(fifo, (*fifo->cc)++); +} + +static inline void +mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) +{ + switch (dma->type) { + case MLX5E_DMA_MAP_SINGLE: + dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE); + break; + case MLX5E_DMA_MAP_PAGE: + dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE); + break; + default: + WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n"); + } +} + +void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq); + +static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs) +{ + return session->ds_count == max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS; +} + +static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq) +{ + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + mlx5_wq_ll_reset(&rq->mpwqe.wq); + rq->mpwqe.actual_wq_head = 0; + } else { + mlx5_wq_cyc_reset(&rq->wqe.wq); + } +} + +static inline void mlx5e_dump_error_cqe(struct mlx5e_cq *cq, u32 qn, + struct mlx5_err_cqe *err_cqe) +{ + struct mlx5_cqwq *wq = &cq->wq; + u32 ci; + + ci = mlx5_cqwq_ctr2ix(wq, wq->cc - 1); + + netdev_err(cq->netdev, + "Error cqe on cqn 0x%x, ci 0x%x, qn 0x%x, opcode 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n", + cq->mcq.cqn, ci, qn, + get_cqe_opcode((struct mlx5_cqe64 *)err_cqe), + err_cqe->syndrome, err_cqe->vendor_err_synd); + mlx5_dump_err_cqe(cq->mdev, err_cqe); +} + +static inline u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq) +{ + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return mlx5_wq_ll_get_size(&rq->mpwqe.wq); + default: + return mlx5_wq_cyc_get_size(&rq->wqe.wq); + } +} + +static inline u32 mlx5e_rqwq_get_cur_sz(struct mlx5e_rq *rq) +{ + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return rq->mpwqe.wq.cur_sz; + default: + return rq->wqe.wq.cur_sz; + } +} + +static inline u16 mlx5e_rqwq_get_head(struct mlx5e_rq *rq) +{ + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return mlx5_wq_ll_get_head(&rq->mpwqe.wq); + default: + return mlx5_wq_cyc_get_head(&rq->wqe.wq); + } +} + +static inline u16 mlx5e_rqwq_get_wqe_counter(struct mlx5e_rq *rq) +{ + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return mlx5_wq_ll_get_counter(&rq->mpwqe.wq); + default: + return mlx5_wq_cyc_get_counter(&rq->wqe.wq); + } +} + +/* SW parser related functions */ + +struct mlx5e_swp_spec { + __be16 l3_proto; + u8 l4_proto; + u8 is_tun; + __be16 tun_l3_proto; + u8 tun_l4_proto; +}; + +static inline void mlx5e_eseg_swp_offsets_add_vlan(struct mlx5_wqe_eth_seg *eseg) +{ + /* SWP offsets are in 2-bytes words */ + eseg->swp_outer_l3_offset += VLAN_HLEN / 2; + eseg->swp_outer_l4_offset += VLAN_HLEN / 2; + eseg->swp_inner_l3_offset += VLAN_HLEN / 2; + eseg->swp_inner_l4_offset += VLAN_HLEN / 2; +} + +static inline void +mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, + struct mlx5e_swp_spec *swp_spec) +{ + /* SWP offsets are in 2-bytes words */ + eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2; + if (swp_spec->l3_proto == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6; + if (swp_spec->l4_proto) { + eseg->swp_outer_l4_offset = skb_transport_offset(skb) / 2; + if (swp_spec->l4_proto == IPPROTO_UDP) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_UDP; + } + + if (swp_spec->is_tun) { + eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; + if (swp_spec->tun_l3_proto == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; + } else { /* typically for ipsec when xfrm mode != XFRM_MODE_TUNNEL */ + eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2; + if (swp_spec->l3_proto == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; + } + switch (swp_spec->tun_l4_proto) { + case IPPROTO_UDP: + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP; + fallthrough; + case IPPROTO_TCP: + eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2; + break; + } +} + +#define MLX5E_STOP_ROOM(wqebbs) ((wqebbs) * 2 - 1) + +static inline u16 mlx5e_stop_room_for_wqe(struct mlx5_core_dev *mdev, u16 wqe_size) +{ + WARN_ON_ONCE(PAGE_SIZE / MLX5_SEND_WQE_BB < (u16)mlx5e_get_max_sq_wqebbs(mdev)); + + /* A WQE must not cross the page boundary, hence two conditions: + * 1. Its size must not exceed the page size. + * 2. If the WQE size is X, and the space remaining in a page is less + * than X, this space needs to be padded with NOPs. So, one WQE of + * size X may require up to X-1 WQEBBs of padding, which makes the + * stop room of X-1 + X. + * WQE size is also limited by the hardware limit. + */ + WARN_ONCE(wqe_size > mlx5e_get_max_sq_wqebbs(mdev), + "wqe_size %u is greater than max SQ WQEBBs %u", + wqe_size, mlx5e_get_max_sq_wqebbs(mdev)); + + return MLX5E_STOP_ROOM(wqe_size); +} + +static inline u16 mlx5e_stop_room_for_max_wqe(struct mlx5_core_dev *mdev) +{ + return MLX5E_STOP_ROOM(mlx5e_get_max_sq_wqebbs(mdev)); +} + +static inline u16 mlx5e_stop_room_for_mpwqe(struct mlx5_core_dev *mdev) +{ + u8 mpwqe_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); + + return mlx5e_stop_room_for_wqe(mdev, mpwqe_wqebbs); +} + +static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size) +{ + u16 room = sq->reserved_room + MLX5E_STOP_ROOM(wqe_size); + + return mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room); +} + +static inline struct mlx5e_mpw_info *mlx5e_get_mpw_info(struct mlx5e_rq *rq, int i) +{ + size_t isz = struct_size(rq->mpwqe.info, alloc_units.frag_pages, rq->mpwqe.pages_per_wqe); + + return (struct mlx5e_mpw_info *)((char *)rq->mpwqe.info + array_size(i, isz)); +} +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c new file mode 100644 index 0000000000..b723ff5e52 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -0,0 +1,909 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include "en/xdp.h" +#include "en/params.h" +#include +#include + +int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) +{ + int hr = mlx5e_get_linear_rq_headroom(params, xsk); + + /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)). + * The condition checked in mlx5e_rx_is_linear_skb is: + * SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1) + * (Note that hw_mtu == sw_mtu + hard_mtu.) + * What is returned from this function is: + * max_mtu = PAGE_SIZE - S - hr - hard_mtu (2) + * After assigning sw_mtu := max_mtu, the left side of (1) turns to + * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE, + * because both PAGE_SIZE and S are already aligned. Any number greater + * than max_mtu would make the left side of (1) greater than PAGE_SIZE, + * so max_mtu is the maximum MTU allowed. + */ + + return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr)); +} + +static inline bool +mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, + struct xdp_buff *xdp) +{ + struct page *page = virt_to_page(xdp->data); + struct mlx5e_xmit_data_frags xdptxdf = {}; + struct mlx5e_xmit_data *xdptxd; + struct xdp_frame *xdpf; + dma_addr_t dma_addr; + int i; + + xdpf = xdp_convert_buff_to_frame(xdp); + if (unlikely(!xdpf)) + return false; + + xdptxd = &xdptxdf.xd; + xdptxd->data = xdpf->data; + xdptxd->len = xdpf->len; + xdptxd->has_frags = xdp_frame_has_frags(xdpf); + + if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) { + /* The xdp_buff was in the UMEM and was copied into a newly + * allocated page. The UMEM page was returned via the ZCA, and + * this new page has to be mapped at this point and has to be + * unmapped and returned via xdp_return_frame on completion. + */ + + /* Prevent double recycling of the UMEM page. Even in case this + * function returns false, the xdp_buff shouldn't be recycled, + * as it was already done in xdp_convert_zc_to_xdp_frame. + */ + __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ + + if (unlikely(xdptxd->has_frags)) + return false; + + dma_addr = dma_map_single(sq->pdev, xdptxd->data, xdptxd->len, + DMA_TO_DEVICE); + if (dma_mapping_error(sq->pdev, dma_addr)) { + xdp_return_frame(xdpf); + return false; + } + + xdptxd->dma_addr = dma_addr; + + if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, + mlx5e_xmit_xdp_frame, sq, xdptxd, 0))) + return false; + + /* xmit_mode == MLX5E_XDP_XMIT_MODE_FRAME */ + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .mode = MLX5E_XDP_XMIT_MODE_FRAME }); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .frame.xdpf = xdpf }); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .frame.dma_addr = dma_addr }); + return true; + } + + /* Driver assumes that xdp_convert_buff_to_frame returns an xdp_frame + * that points to the same memory region as the original xdp_buff. It + * allows to map the memory only once and to use the DMA_BIDIRECTIONAL + * mode. + */ + + dma_addr = page_pool_get_dma_addr(page) + (xdpf->data - (void *)xdpf); + dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd->len, DMA_BIDIRECTIONAL); + + if (xdptxd->has_frags) { + xdptxdf.sinfo = xdp_get_shared_info_from_frame(xdpf); + xdptxdf.dma_arr = NULL; + + for (i = 0; i < xdptxdf.sinfo->nr_frags; i++) { + skb_frag_t *frag = &xdptxdf.sinfo->frags[i]; + dma_addr_t addr; + u32 len; + + addr = page_pool_get_dma_addr(skb_frag_page(frag)) + + skb_frag_off(frag); + len = skb_frag_size(frag); + dma_sync_single_for_device(sq->pdev, addr, len, + DMA_BIDIRECTIONAL); + } + } + + xdptxd->dma_addr = dma_addr; + + if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, + mlx5e_xmit_xdp_frame, sq, xdptxd, 0))) + return false; + + /* xmit_mode == MLX5E_XDP_XMIT_MODE_PAGE */ + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .mode = MLX5E_XDP_XMIT_MODE_PAGE }); + + if (xdptxd->has_frags) { + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) + { .page.num = 1 + xdptxdf.sinfo->nr_frags }); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .page.page = page }); + for (i = 0; i < xdptxdf.sinfo->nr_frags; i++) { + skb_frag_t *frag = &xdptxdf.sinfo->frags[i]; + + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) + { .page.page = skb_frag_page(frag) }); + } + } else { + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .page.num = 1 }); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .page.page = page }); + } + + return true; +} + +static int mlx5e_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) +{ + const struct mlx5e_xdp_buff *_ctx = (void *)ctx; + + if (unlikely(!mlx5e_rx_hw_stamp(_ctx->rq->tstamp))) + return -ENODATA; + + *timestamp = mlx5e_cqe_ts_to_ns(_ctx->rq->ptp_cyc2time, + _ctx->rq->clock, get_cqe_ts(_ctx->cqe)); + return 0; +} + +/* Mapping HW RSS Type bits CQE_RSS_HTYPE_IP + CQE_RSS_HTYPE_L4 into 4-bits*/ +#define RSS_TYPE_MAX_TABLE 16 /* 4-bits max 16 entries */ +#define RSS_L4 GENMASK(1, 0) +#define RSS_L3 GENMASK(3, 2) /* Same as CQE_RSS_HTYPE_IP */ + +/* Valid combinations of CQE_RSS_HTYPE_IP + CQE_RSS_HTYPE_L4 sorted numerical */ +enum mlx5_rss_hash_type { + RSS_TYPE_NO_HASH = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IP_NONE) | + FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_NONE)), + RSS_TYPE_L3_IPV4 = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) | + FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_NONE)), + RSS_TYPE_L4_IPV4_TCP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) | + FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_TCP)), + RSS_TYPE_L4_IPV4_UDP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) | + FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_UDP)), + RSS_TYPE_L4_IPV4_IPSEC = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) | + FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_IPSEC)), + RSS_TYPE_L3_IPV6 = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) | + FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_NONE)), + RSS_TYPE_L4_IPV6_TCP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) | + FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_TCP)), + RSS_TYPE_L4_IPV6_UDP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) | + FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_UDP)), + RSS_TYPE_L4_IPV6_IPSEC = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) | + FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_IPSEC)), +}; + +/* Invalid combinations will simply return zero, allows no boundary checks */ +static const enum xdp_rss_hash_type mlx5_xdp_rss_type[RSS_TYPE_MAX_TABLE] = { + [RSS_TYPE_NO_HASH] = XDP_RSS_TYPE_NONE, + [1] = XDP_RSS_TYPE_NONE, /* Implicit zero */ + [2] = XDP_RSS_TYPE_NONE, /* Implicit zero */ + [3] = XDP_RSS_TYPE_NONE, /* Implicit zero */ + [RSS_TYPE_L3_IPV4] = XDP_RSS_TYPE_L3_IPV4, + [RSS_TYPE_L4_IPV4_TCP] = XDP_RSS_TYPE_L4_IPV4_TCP, + [RSS_TYPE_L4_IPV4_UDP] = XDP_RSS_TYPE_L4_IPV4_UDP, + [RSS_TYPE_L4_IPV4_IPSEC] = XDP_RSS_TYPE_L4_IPV4_IPSEC, + [RSS_TYPE_L3_IPV6] = XDP_RSS_TYPE_L3_IPV6, + [RSS_TYPE_L4_IPV6_TCP] = XDP_RSS_TYPE_L4_IPV6_TCP, + [RSS_TYPE_L4_IPV6_UDP] = XDP_RSS_TYPE_L4_IPV6_UDP, + [RSS_TYPE_L4_IPV6_IPSEC] = XDP_RSS_TYPE_L4_IPV6_IPSEC, + [12] = XDP_RSS_TYPE_NONE, /* Implicit zero */ + [13] = XDP_RSS_TYPE_NONE, /* Implicit zero */ + [14] = XDP_RSS_TYPE_NONE, /* Implicit zero */ + [15] = XDP_RSS_TYPE_NONE, /* Implicit zero */ +}; + +static int mlx5e_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, + enum xdp_rss_hash_type *rss_type) +{ + const struct mlx5e_xdp_buff *_ctx = (void *)ctx; + const struct mlx5_cqe64 *cqe = _ctx->cqe; + u32 hash_type, l4_type, ip_type, lookup; + + if (unlikely(!(_ctx->xdp.rxq->dev->features & NETIF_F_RXHASH))) + return -ENODATA; + + *hash = be32_to_cpu(cqe->rss_hash_result); + + hash_type = cqe->rss_hash_type; + BUILD_BUG_ON(CQE_RSS_HTYPE_IP != RSS_L3); /* same mask */ + ip_type = hash_type & CQE_RSS_HTYPE_IP; + l4_type = FIELD_GET(CQE_RSS_HTYPE_L4, hash_type); + lookup = ip_type | l4_type; + *rss_type = mlx5_xdp_rss_type[lookup]; + + return 0; +} + +const struct xdp_metadata_ops mlx5e_xdp_metadata_ops = { + .xmo_rx_timestamp = mlx5e_xdp_rx_timestamp, + .xmo_rx_hash = mlx5e_xdp_rx_hash, +}; + +/* returns true if packet was consumed by xdp */ +bool mlx5e_xdp_handle(struct mlx5e_rq *rq, + struct bpf_prog *prog, struct mlx5e_xdp_buff *mxbuf) +{ + struct xdp_buff *xdp = &mxbuf->xdp; + u32 act; + int err; + + act = bpf_prog_run_xdp(prog, xdp); + switch (act) { + case XDP_PASS: + return false; + case XDP_TX: + if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, xdp))) + goto xdp_abort; + __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ + return true; + case XDP_REDIRECT: + /* When XDP enabled then page-refcnt==1 here */ + err = xdp_do_redirect(rq->netdev, xdp, prog); + if (unlikely(err)) + goto xdp_abort; + __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); + __set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); + rq->stats->xdp_redirect++; + return true; + default: + bpf_warn_invalid_xdp_action(rq->netdev, prog, act); + fallthrough; + case XDP_ABORTED: +xdp_abort: + trace_xdp_exception(rq->netdev, prog, act); + fallthrough; + case XDP_DROP: + rq->stats->xdp_drop++; + return true; + } +} + +static u16 mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq *sq, u16 size) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi, contig_wqebbs; + + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + if (unlikely(contig_wqebbs < size)) { + struct mlx5e_xdp_wqe_info *wi, *edge_wi; + + wi = &sq->db.wqe_info[pi]; + edge_wi = wi + contig_wqebbs; + + /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */ + for (; wi < edge_wi; wi++) { + *wi = (struct mlx5e_xdp_wqe_info) { + .num_wqebbs = 1, + .num_pkts = 0, + }; + mlx5e_post_nop(wq, sq->sqn, &sq->pc); + } + sq->stats->nops += contig_wqebbs; + + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + } + + return pi; +} + +static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) +{ + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; + struct mlx5e_xdpsq_stats *stats = sq->stats; + struct mlx5e_tx_wqe *wqe; + u16 pi; + + pi = mlx5e_xdpsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs); + wqe = MLX5E_TX_FETCH_WQE(sq, pi); + net_prefetchw(wqe->data); + + *session = (struct mlx5e_tx_mpwqe) { + .wqe = wqe, + .bytes_count = 0, + .ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT, + .pkt_count = 0, + .inline_on = mlx5e_xdp_get_inline_state(sq, session->inline_on), + }; + + stats->mpwqe++; +} + +void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; + struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl; + u16 ds_count = session->ds_count; + u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi]; + + cseg->opmod_idx_opcode = + cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count); + + wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS); + wi->num_pkts = session->pkt_count; + + sq->pc += wi->num_wqebbs; + + sq->doorbell_cseg = cseg; + + session->wqe = NULL; /* Close session */ +} + +enum { + MLX5E_XDP_CHECK_OK = 1, + MLX5E_XDP_CHECK_START_MPWQE = 2, +}; + +INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq) +{ + if (unlikely(!sq->mpwqe.wqe)) { + if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, + sq->stop_room))) { + /* SQ is full, ring doorbell */ + mlx5e_xmit_xdp_doorbell(sq); + sq->stats->full++; + return -EBUSY; + } + + return MLX5E_XDP_CHECK_START_MPWQE; + } + + return MLX5E_XDP_CHECK_OK; +} + +INDIRECT_CALLABLE_SCOPE bool +mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, + int check_result); + +INDIRECT_CALLABLE_SCOPE bool +mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, + int check_result) +{ + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; + struct mlx5e_xdpsq_stats *stats = sq->stats; + struct mlx5e_xmit_data *p = xdptxd; + struct mlx5e_xmit_data tmp; + + if (xdptxd->has_frags) { + struct mlx5e_xmit_data_frags *xdptxdf = + container_of(xdptxd, struct mlx5e_xmit_data_frags, xd); + + if (!!xdptxd->len + xdptxdf->sinfo->nr_frags > 1) { + /* MPWQE is enabled, but a multi-buffer packet is queued for + * transmission. MPWQE can't send fragmented packets, so close + * the current session and fall back to a regular WQE. + */ + if (unlikely(sq->mpwqe.wqe)) + mlx5e_xdp_mpwqe_complete(sq); + return mlx5e_xmit_xdp_frame(sq, xdptxd, 0); + } + if (!xdptxd->len) { + skb_frag_t *frag = &xdptxdf->sinfo->frags[0]; + + tmp.data = skb_frag_address(frag); + tmp.len = skb_frag_size(frag); + tmp.dma_addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[0] : + page_pool_get_dma_addr(skb_frag_page(frag)) + + skb_frag_off(frag); + p = &tmp; + } + } + + if (unlikely(p->len > sq->hw_mtu)) { + stats->err++; + return false; + } + + if (!check_result) + check_result = mlx5e_xmit_xdp_frame_check_mpwqe(sq); + if (unlikely(check_result < 0)) + return false; + + if (check_result == MLX5E_XDP_CHECK_START_MPWQE) { + /* Start the session when nothing can fail, so it's guaranteed + * that if there is an active session, it has at least one dseg, + * and it's safe to complete it at any time. + */ + mlx5e_xdp_mpwqe_session_start(sq); + } + + mlx5e_xdp_mpwqe_add_dseg(sq, p, stats); + + if (unlikely(mlx5e_xdp_mpwqe_is_full(session, sq->max_sq_mpw_wqebbs))) + mlx5e_xdp_mpwqe_complete(sq); + + stats->xmit++; + return true; +} + +static int mlx5e_xmit_xdp_frame_check_stop_room(struct mlx5e_xdpsq *sq, int stop_room) +{ + if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, stop_room))) { + /* SQ is full, ring doorbell */ + mlx5e_xmit_xdp_doorbell(sq); + sq->stats->full++; + return -EBUSY; + } + + return MLX5E_XDP_CHECK_OK; +} + +INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq) +{ + return mlx5e_xmit_xdp_frame_check_stop_room(sq, 1); +} + +INDIRECT_CALLABLE_SCOPE bool +mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, + int check_result) +{ + struct mlx5e_xmit_data_frags *xdptxdf = + container_of(xdptxd, struct mlx5e_xmit_data_frags, xd); + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5_wqe_data_seg *dseg; + struct mlx5_wqe_eth_seg *eseg; + struct mlx5e_tx_wqe *wqe; + + dma_addr_t dma_addr = xdptxd->dma_addr; + u32 dma_len = xdptxd->len; + u16 ds_cnt, inline_hdr_sz; + unsigned int frags_size; + u8 num_wqebbs = 1; + int num_frags = 0; + bool inline_ok; + bool linear; + u16 pi; + + struct mlx5e_xdpsq_stats *stats = sq->stats; + + inline_ok = sq->min_inline_mode == MLX5_INLINE_MODE_NONE || + dma_len >= MLX5E_XDP_MIN_INLINE; + frags_size = xdptxd->has_frags ? xdptxdf->sinfo->xdp_frags_size : 0; + + if (unlikely(!inline_ok || sq->hw_mtu < dma_len + frags_size)) { + stats->err++; + return false; + } + + inline_hdr_sz = 0; + if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) + inline_hdr_sz = MLX5E_XDP_MIN_INLINE; + + linear = !!(dma_len - inline_hdr_sz); + ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + linear + !!inline_hdr_sz; + + /* check_result must be 0 if sinfo is passed. */ + if (!check_result) { + int stop_room = 1; + + if (xdptxd->has_frags) { + ds_cnt += xdptxdf->sinfo->nr_frags; + num_frags = xdptxdf->sinfo->nr_frags; + num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); + /* Assuming MLX5_CAP_GEN(mdev, max_wqe_sz_sq) is big + * enough to hold all fragments. + */ + stop_room = MLX5E_STOP_ROOM(num_wqebbs); + } + + check_result = mlx5e_xmit_xdp_frame_check_stop_room(sq, stop_room); + } + if (unlikely(check_result < 0)) + return false; + + pi = mlx5e_xdpsq_get_next_pi(sq, num_wqebbs); + wqe = mlx5_wq_cyc_get_wqe(wq, pi); + net_prefetchw(wqe); + + cseg = &wqe->ctrl; + eseg = &wqe->eth; + dseg = wqe->data; + + /* copy the inline part if required */ + if (inline_hdr_sz) { + memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start)); + memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start), + inline_hdr_sz - sizeof(eseg->inline_hdr.start)); + dma_len -= inline_hdr_sz; + dma_addr += inline_hdr_sz; + dseg++; + } + + /* write the dma part */ + if (linear) { + dseg->addr = cpu_to_be64(dma_addr); + dseg->byte_count = cpu_to_be32(dma_len); + dseg->lkey = sq->mkey_be; + dseg++; + } + + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); + + if (test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) { + int i; + + memset(&cseg->trailer, 0, sizeof(cseg->trailer)); + memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer)); + + eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); + + for (i = 0; i < num_frags; i++) { + skb_frag_t *frag = &xdptxdf->sinfo->frags[i]; + dma_addr_t addr; + + addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[i] : + page_pool_get_dma_addr(skb_frag_page(frag)) + + skb_frag_off(frag); + + dseg->addr = cpu_to_be64(addr); + dseg->byte_count = cpu_to_be32(skb_frag_size(frag)); + dseg->lkey = sq->mkey_be; + dseg++; + } + + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + + sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) { + .num_wqebbs = num_wqebbs, + .num_pkts = 1, + }; + + sq->pc += num_wqebbs; + } else { + cseg->fm_ce_se = 0; + + sq->pc++; + } + + sq->doorbell_cseg = cseg; + + stats->xmit++; + return true; +} + +static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, + struct mlx5e_xdp_wqe_info *wi, + u32 *xsk_frames, + struct xdp_frame_bulk *bq) +{ + struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; + u16 i; + + for (i = 0; i < wi->num_pkts; i++) { + union mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); + + switch (xdpi.mode) { + case MLX5E_XDP_XMIT_MODE_FRAME: { + /* XDP_TX from the XSK RQ and XDP_REDIRECT */ + struct xdp_frame *xdpf; + dma_addr_t dma_addr; + + xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); + xdpf = xdpi.frame.xdpf; + xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); + dma_addr = xdpi.frame.dma_addr; + + dma_unmap_single(sq->pdev, dma_addr, + xdpf->len, DMA_TO_DEVICE); + if (xdp_frame_has_frags(xdpf)) { + struct skb_shared_info *sinfo; + int j; + + sinfo = xdp_get_shared_info_from_frame(xdpf); + for (j = 0; j < sinfo->nr_frags; j++) { + skb_frag_t *frag = &sinfo->frags[j]; + + xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); + dma_addr = xdpi.frame.dma_addr; + + dma_unmap_single(sq->pdev, dma_addr, + skb_frag_size(frag), DMA_TO_DEVICE); + } + } + xdp_return_frame_bulk(xdpf, bq); + break; + } + case MLX5E_XDP_XMIT_MODE_PAGE: { + /* XDP_TX from the regular RQ */ + u8 num, n = 0; + + xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); + num = xdpi.page.num; + + do { + struct page *page; + + xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); + page = xdpi.page.page; + + /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) + * as we know this is a page_pool page. + */ + page_pool_recycle_direct(page->pp, page); + } while (++n < num); + + break; + } + case MLX5E_XDP_XMIT_MODE_XSK: + /* AF_XDP send */ + (*xsk_frames)++; + break; + default: + WARN_ON_ONCE(true); + } + } +} + +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) +{ + struct xdp_frame_bulk bq; + struct mlx5e_xdpsq *sq; + struct mlx5_cqe64 *cqe; + u32 xsk_frames = 0; + u16 sqcc; + int i; + + xdp_frame_bulk_init(&bq); + + sq = container_of(cq, struct mlx5e_xdpsq, cq); + + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + return false; + + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (!cqe) + return false; + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + sqcc = sq->cc; + + i = 0; + do { + struct mlx5e_xdp_wqe_info *wi; + u16 wqe_counter, ci; + bool last_wqe; + + mlx5_cqwq_pop(&cq->wq); + + wqe_counter = be16_to_cpu(cqe->wqe_counter); + + do { + last_wqe = (sqcc == wqe_counter); + ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); + wi = &sq->db.wqe_info[ci]; + + sqcc += wi->num_wqebbs; + + mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, &bq); + } while (!last_wqe); + + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { + netdev_WARN_ONCE(sq->channel->netdev, + "Bad OP in XDPSQ CQE: 0x%x\n", + get_cqe_opcode(cqe)); + mlx5e_dump_error_cqe(&sq->cq, sq->sqn, + (struct mlx5_err_cqe *)cqe); + mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); + } + } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); + + xdp_flush_frame_bulk(&bq); + + if (xsk_frames) + xsk_tx_completed(sq->xsk_pool, xsk_frames); + + sq->stats->cqes += i; + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + sq->cc = sqcc; + return (i == MLX5E_TX_CQ_POLL_BUDGET); +} + +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) +{ + struct xdp_frame_bulk bq; + u32 xsk_frames = 0; + + xdp_frame_bulk_init(&bq); + + rcu_read_lock(); /* need for xdp_return_frame_bulk */ + + while (sq->cc != sq->pc) { + struct mlx5e_xdp_wqe_info *wi; + u16 ci; + + ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc); + wi = &sq->db.wqe_info[ci]; + + sq->cc += wi->num_wqebbs; + + mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, &bq); + } + + xdp_flush_frame_bulk(&bq); + rcu_read_unlock(); + + if (xsk_frames) + xsk_tx_completed(sq->xsk_pool, xsk_frames); +} + +int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_xdpsq *sq; + int nxmit = 0; + int sq_num; + int i; + + /* this flag is sufficient, no need to test internal sq state */ + if (unlikely(!mlx5e_xdp_tx_is_enabled(priv))) + return -ENETDOWN; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + sq_num = smp_processor_id(); + + if (unlikely(sq_num >= priv->channels.num)) + return -ENXIO; + + sq = &priv->channels.c[sq_num]->xdpsq; + + for (i = 0; i < n; i++) { + struct mlx5e_xmit_data_frags xdptxdf = {}; + struct xdp_frame *xdpf = frames[i]; + dma_addr_t dma_arr[MAX_SKB_FRAGS]; + struct mlx5e_xmit_data *xdptxd; + bool ret; + + xdptxd = &xdptxdf.xd; + xdptxd->data = xdpf->data; + xdptxd->len = xdpf->len; + xdptxd->has_frags = xdp_frame_has_frags(xdpf); + xdptxd->dma_addr = dma_map_single(sq->pdev, xdptxd->data, + xdptxd->len, DMA_TO_DEVICE); + + if (unlikely(dma_mapping_error(sq->pdev, xdptxd->dma_addr))) + break; + + if (xdptxd->has_frags) { + int j; + + xdptxdf.sinfo = xdp_get_shared_info_from_frame(xdpf); + xdptxdf.dma_arr = dma_arr; + for (j = 0; j < xdptxdf.sinfo->nr_frags; j++) { + skb_frag_t *frag = &xdptxdf.sinfo->frags[j]; + + dma_arr[j] = dma_map_single(sq->pdev, skb_frag_address(frag), + skb_frag_size(frag), DMA_TO_DEVICE); + + if (!dma_mapping_error(sq->pdev, dma_arr[j])) + continue; + /* mapping error */ + while (--j >= 0) + dma_unmap_single(sq->pdev, dma_arr[j], + skb_frag_size(&xdptxdf.sinfo->frags[j]), + DMA_TO_DEVICE); + goto out; + } + } + + ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, + mlx5e_xmit_xdp_frame, sq, xdptxd, 0); + if (unlikely(!ret)) { + int j; + + dma_unmap_single(sq->pdev, xdptxd->dma_addr, + xdptxd->len, DMA_TO_DEVICE); + if (!xdptxd->has_frags) + break; + for (j = 0; j < xdptxdf.sinfo->nr_frags; j++) + dma_unmap_single(sq->pdev, dma_arr[j], + skb_frag_size(&xdptxdf.sinfo->frags[j]), + DMA_TO_DEVICE); + break; + } + + /* xmit_mode == MLX5E_XDP_XMIT_MODE_FRAME */ + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .mode = MLX5E_XDP_XMIT_MODE_FRAME }); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .frame.xdpf = xdpf }); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) { .frame.dma_addr = xdptxd->dma_addr }); + if (xdptxd->has_frags) { + int j; + + for (j = 0; j < xdptxdf.sinfo->nr_frags; j++) + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, + (union mlx5e_xdp_info) + { .frame.dma_addr = dma_arr[j] }); + } + nxmit++; + } + +out: + if (sq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(sq); + + if (flags & XDP_XMIT_FLUSH) + mlx5e_xmit_xdp_doorbell(sq); + + return nxmit; +} + +void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq) +{ + struct mlx5e_xdpsq *xdpsq = rq->xdpsq; + + if (xdpsq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(xdpsq); + + mlx5e_xmit_xdp_doorbell(xdpsq); + + if (test_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags)) { + xdp_do_flush_map(); + __clear_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); + } +} + +void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw) +{ + sq->xmit_xdp_frame_check = is_mpw ? + mlx5e_xmit_xdp_frame_check_mpwqe : mlx5e_xmit_xdp_frame_check; + sq->xmit_xdp_frame = is_mpw ? + mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h new file mode 100644 index 0000000000..ecfe93a479 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2018, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __MLX5_EN_XDP_H__ +#define __MLX5_EN_XDP_H__ + +#include + +#include "en.h" +#include "en/txrx.h" + +#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) + +#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT 16 +#define MLX5E_XDP_INLINE_WQE_SZ_THRSD \ + (MLX5E_XDP_INLINE_WQE_MAX_DS_CNT * MLX5_SEND_WQE_DS - \ + sizeof(struct mlx5_wqe_inline_seg)) + +struct mlx5e_xdp_buff { + struct xdp_buff xdp; + struct mlx5_cqe64 *cqe; + struct mlx5e_rq *rq; +}; + +/* XDP packets can be transmitted in different ways. On completion, we need to + * distinguish between them to clean up things in a proper way. + */ +enum mlx5e_xdp_xmit_mode { + /* An xdp_frame was transmitted due to either XDP_REDIRECT from another + * device or XDP_TX from an XSK RQ. The frame has to be unmapped and + * returned. + */ + MLX5E_XDP_XMIT_MODE_FRAME, + + /* The xdp_frame was created in place as a result of XDP_TX from a + * regular RQ. No DMA remapping happened, and the page belongs to us. + */ + MLX5E_XDP_XMIT_MODE_PAGE, + + /* No xdp_frame was created at all, the transmit happened from a UMEM + * page. The UMEM Completion Ring producer pointer has to be increased. + */ + MLX5E_XDP_XMIT_MODE_XSK, +}; + +/* xmit_mode entry is pushed to the fifo per packet, followed by multiple + * entries, as follows: + * + * MLX5E_XDP_XMIT_MODE_FRAME: + * xdpf, dma_addr_1, dma_addr_2, ... , dma_addr_num. + * 'num' is derived from xdpf. + * + * MLX5E_XDP_XMIT_MODE_PAGE: + * num, page_1, page_2, ... , page_num. + * + * MLX5E_XDP_XMIT_MODE_XSK: + * none. + */ +#define MLX5E_XDP_FIFO_ENTRIES2DS_MAX_RATIO 4 + +union mlx5e_xdp_info { + enum mlx5e_xdp_xmit_mode mode; + union { + struct xdp_frame *xdpf; + dma_addr_t dma_addr; + } frame; + union { + struct mlx5e_rq *rq; + u8 num; + struct page *page; + } page; +}; + +struct mlx5e_xsk_param; +int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); +bool mlx5e_xdp_handle(struct mlx5e_rq *rq, + struct bpf_prog *prog, struct mlx5e_xdp_buff *mlctx); +void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq); +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq); +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq); +void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw); +void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq); +int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags); + +extern const struct xdp_metadata_ops mlx5e_xdp_metadata_ops; + +INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, + struct mlx5e_xmit_data *xdptxd, + int check_result)); +INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, + struct mlx5e_xmit_data *xdptxd, + int check_result)); +INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)); +INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)); + +static inline void mlx5e_xdp_tx_enable(struct mlx5e_priv *priv) +{ + set_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); + + if (priv->channels.params.xdp_prog) + set_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state); +} + +static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv) +{ + if (priv->channels.params.xdp_prog) + clear_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state); + + clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); + /* Let other device's napi(s) and XSK wakeups see our new state. */ + synchronize_net(); +} + +static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv) +{ + return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); +} + +static inline bool mlx5e_xdp_is_active(struct mlx5e_priv *priv) +{ + return test_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state); +} + +static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq) +{ + if (sq->doorbell_cseg) { + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, sq->doorbell_cseg); + sq->doorbell_cseg = NULL; + } +} + +/* Enable inline WQEs to shift some load from a congested HCA (HW) to + * a less congested cpu (SW). + */ +static inline bool mlx5e_xdp_get_inline_state(struct mlx5e_xdpsq *sq, bool cur) +{ + u16 outstanding = sq->xdpi_fifo_pc - sq->xdpi_fifo_cc; + +#define MLX5E_XDP_INLINE_WATERMARK_LOW 10 +#define MLX5E_XDP_INLINE_WATERMARK_HIGH 128 + + if (cur && outstanding <= MLX5E_XDP_INLINE_WATERMARK_LOW) + return false; + + if (!cur && outstanding >= MLX5E_XDP_INLINE_WATERMARK_HIGH) + return true; + + return cur; +} + +static inline bool mlx5e_xdp_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs) +{ + if (session->inline_on) + return session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > + max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS; + + return mlx5e_tx_mpwqe_is_full(session, max_sq_mpw_wqebbs); +} + +struct mlx5e_xdp_wqe_info { + u8 num_wqebbs; + u8 num_pkts; +}; + +static inline void +mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, + struct mlx5e_xmit_data *xdptxd, + struct mlx5e_xdpsq_stats *stats) +{ + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; + struct mlx5_wqe_data_seg *dseg = + (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count; + u32 dma_len = xdptxd->len; + + session->pkt_count++; + session->bytes_count += dma_len; + + if (session->inline_on && dma_len <= MLX5E_XDP_INLINE_WQE_SZ_THRSD) { + struct mlx5_wqe_inline_seg *inline_dseg = + (struct mlx5_wqe_inline_seg *)dseg; + u16 ds_len = sizeof(*inline_dseg) + dma_len; + u16 ds_cnt = DIV_ROUND_UP(ds_len, MLX5_SEND_WQE_DS); + + inline_dseg->byte_count = cpu_to_be32(dma_len | MLX5_INLINE_SEG); + memcpy(inline_dseg->data, xdptxd->data, dma_len); + + session->ds_count += ds_cnt; + stats->inlnw++; + return; + } + + dseg->addr = cpu_to_be64(xdptxd->dma_addr); + dseg->byte_count = cpu_to_be32(dma_len); + dseg->lkey = sq->mkey_be; + session->ds_count++; +} + +static inline void +mlx5e_xdpi_fifo_push(struct mlx5e_xdp_info_fifo *fifo, + union mlx5e_xdp_info xi) +{ + u32 i = (*fifo->pc)++ & fifo->mask; + + fifo->xi[i] = xi; +} + +static inline union mlx5e_xdp_info +mlx5e_xdpi_fifo_pop(struct mlx5e_xdp_info_fifo *fifo) +{ + return fifo->xi[(*fifo->cc)++ & fifo->mask]; +} +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c new file mode 100644 index 0000000000..ebada0c5af --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved. */ + +#include +#include "pool.h" +#include "setup.h" +#include "en/params.h" + +static int mlx5e_xsk_map_pool(struct mlx5e_priv *priv, + struct xsk_buff_pool *pool) +{ + struct device *dev = mlx5_core_dma_dev(priv->mdev); + + return xsk_pool_dma_map(pool, dev, DMA_ATTR_SKIP_CPU_SYNC); +} + +static void mlx5e_xsk_unmap_pool(struct mlx5e_priv *priv, + struct xsk_buff_pool *pool) +{ + return xsk_pool_dma_unmap(pool, DMA_ATTR_SKIP_CPU_SYNC); +} + +static int mlx5e_xsk_get_pools(struct mlx5e_xsk *xsk) +{ + if (!xsk->pools) { + xsk->pools = kcalloc(MLX5E_MAX_NUM_CHANNELS, + sizeof(*xsk->pools), GFP_KERNEL); + if (unlikely(!xsk->pools)) + return -ENOMEM; + } + + xsk->refcnt++; + xsk->ever_used = true; + + return 0; +} + +static void mlx5e_xsk_put_pools(struct mlx5e_xsk *xsk) +{ + if (!--xsk->refcnt) { + kfree(xsk->pools); + xsk->pools = NULL; + } +} + +static int mlx5e_xsk_add_pool(struct mlx5e_xsk *xsk, struct xsk_buff_pool *pool, u16 ix) +{ + int err; + + err = mlx5e_xsk_get_pools(xsk); + if (unlikely(err)) + return err; + + xsk->pools[ix] = pool; + return 0; +} + +static void mlx5e_xsk_remove_pool(struct mlx5e_xsk *xsk, u16 ix) +{ + xsk->pools[ix] = NULL; + + mlx5e_xsk_put_pools(xsk); +} + +static bool mlx5e_xsk_is_pool_sane(struct xsk_buff_pool *pool) +{ + return xsk_pool_get_headroom(pool) <= 0xffff && + xsk_pool_get_chunk_size(pool) <= 0xffff; +} + +void mlx5e_build_xsk_param(struct xsk_buff_pool *pool, struct mlx5e_xsk_param *xsk) +{ + xsk->headroom = xsk_pool_get_headroom(pool); + xsk->chunk_size = xsk_pool_get_chunk_size(pool); + xsk->unaligned = pool->unaligned; +} + +static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, + struct xsk_buff_pool *pool, u16 ix) +{ + struct mlx5e_params *params = &priv->channels.params; + struct mlx5e_xsk_param xsk; + struct mlx5e_channel *c; + int err; + + if (unlikely(mlx5e_xsk_get_pool(&priv->channels.params, &priv->xsk, ix))) + return -EBUSY; + + if (unlikely(!mlx5e_xsk_is_pool_sane(pool))) + return -EINVAL; + + err = mlx5e_xsk_map_pool(priv, pool); + if (unlikely(err)) + return err; + + err = mlx5e_xsk_add_pool(&priv->xsk, pool, ix); + if (unlikely(err)) + goto err_unmap_pool; + + mlx5e_build_xsk_param(pool, &xsk); + + if (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && + mlx5e_mpwrq_umr_mode(priv->mdev, &xsk) == MLX5E_MPWRQ_UMR_MODE_OVERSIZED) { + const char *recommendation = is_power_of_2(xsk.chunk_size) ? + "Upgrade firmware" : "Disable striding RQ"; + + mlx5_core_warn(priv->mdev, "Expected slowdown with XSK frame size %u. %s for better performance.\n", + xsk.chunk_size, recommendation); + } + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + /* XSK objects will be created on open. */ + goto validate_closed; + } + + if (!params->xdp_prog) { + /* XSK objects will be created when an XDP program is set, + * and the channels are reopened. + */ + goto validate_closed; + } + + c = priv->channels.c[ix]; + + err = mlx5e_open_xsk(priv, params, &xsk, pool, c); + if (unlikely(err)) + goto err_remove_pool; + + mlx5e_activate_xsk(c); + mlx5e_trigger_napi_icosq(c); + + /* Don't wait for WQEs, because the newer xdpsock sample doesn't provide + * any Fill Ring entries at the setup stage. + */ + + mlx5e_rx_res_xsk_update(priv->rx_res, &priv->channels, ix, true); + + mlx5e_deactivate_rq(&c->rq); + mlx5e_flush_rq(&c->rq, MLX5_RQC_STATE_RDY); + + return 0; + +err_remove_pool: + mlx5e_xsk_remove_pool(&priv->xsk, ix); + +err_unmap_pool: + mlx5e_xsk_unmap_pool(priv, pool); + + return err; + +validate_closed: + /* Check the configuration in advance, rather than fail at a later stage + * (in mlx5e_xdp_set or on open) and end up with no channels. + */ + if (!mlx5e_validate_xsk_param(params, &xsk, priv->mdev)) { + err = -EINVAL; + goto err_remove_pool; + } + + return 0; +} + +static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix) +{ + struct xsk_buff_pool *pool = mlx5e_xsk_get_pool(&priv->channels.params, + &priv->xsk, ix); + struct mlx5e_channel *c; + + if (unlikely(!pool)) + return -EINVAL; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto remove_pool; + + /* XSK RQ and SQ are only created if XDP program is set. */ + if (!priv->channels.params.xdp_prog) + goto remove_pool; + + c = priv->channels.c[ix]; + + mlx5e_activate_rq(&c->rq); + mlx5e_trigger_napi_icosq(c); + mlx5e_wait_for_min_rx_wqes(&c->rq, MLX5E_RQ_WQES_TIMEOUT); + + mlx5e_rx_res_xsk_update(priv->rx_res, &priv->channels, ix, false); + + mlx5e_deactivate_xsk(c); + mlx5e_close_xsk(c); + +remove_pool: + mlx5e_xsk_remove_pool(&priv->xsk, ix); + mlx5e_xsk_unmap_pool(priv, pool); + + return 0; +} + +static int mlx5e_xsk_enable_pool(struct mlx5e_priv *priv, struct xsk_buff_pool *pool, + u16 ix) +{ + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_xsk_enable_locked(priv, pool, ix); + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_xsk_disable_pool(struct mlx5e_priv *priv, u16 ix) +{ + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_xsk_disable_locked(priv, ix); + mutex_unlock(&priv->state_lock); + + return err; +} + +int mlx5e_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_params *params = &priv->channels.params; + + if (unlikely(qid >= params->num_channels)) + return -EINVAL; + + return pool ? mlx5e_xsk_enable_pool(priv, pool, qid) : + mlx5e_xsk_disable_pool(priv, qid); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h new file mode 100644 index 0000000000..dca0010a08 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_EN_XSK_POOL_H__ +#define __MLX5_EN_XSK_POOL_H__ + +#include "en.h" + +static inline struct xsk_buff_pool *mlx5e_xsk_get_pool(struct mlx5e_params *params, + struct mlx5e_xsk *xsk, u16 ix) +{ + if (!xsk || !xsk->pools) + return NULL; + + if (unlikely(ix >= params->num_channels)) + return NULL; + + return xsk->pools[ix]; +} + +struct mlx5e_xsk_param; +void mlx5e_build_xsk_param(struct xsk_buff_pool *pool, struct mlx5e_xsk_param *xsk); + +/* .ndo_bpf callback. */ +int mlx5e_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid); + +#endif /* __MLX5_EN_XSK_POOL_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c new file mode 100644 index 0000000000..b8dd744536 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -0,0 +1,337 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "rx.h" +#include "en/xdp.h" +#include +#include + +/* RX data path */ + +static struct mlx5e_xdp_buff *xsk_buff_to_mxbuf(struct xdp_buff *xdp) +{ + /* mlx5e_xdp_buff shares its layout with xdp_buff_xsk + * and private mlx5e_xdp_buff fields fall into xdp_buff_xsk->cb + */ + return (struct mlx5e_xdp_buff *)xdp; +} + +int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) +{ + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); + struct mlx5e_icosq *icosq = rq->icosq; + struct mlx5_wq_cyc *wq = &icosq->wq; + struct mlx5e_umr_wqe *umr_wqe; + struct xdp_buff **xsk_buffs; + int batch, i; + u32 offset; /* 17-bit value with MTT. */ + u16 pi; + + if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe))) + goto err; + + XSK_CHECK_PRIV_TYPE(struct mlx5e_xdp_buff); + xsk_buffs = (struct xdp_buff **)wi->alloc_units.xsk_buffs; + batch = xsk_buff_alloc_batch(rq->xsk_pool, xsk_buffs, + rq->mpwqe.pages_per_wqe); + + /* If batch < pages_per_wqe, either: + * 1. Some (or all) descriptors were invalid. + * 2. dma_need_sync is true, and it fell back to allocating one frame. + * In either case, try to continue allocating frames one by one, until + * the first error, which will mean there are no more valid descriptors. + */ + for (; batch < rq->mpwqe.pages_per_wqe; batch++) { + xsk_buffs[batch] = xsk_buff_alloc(rq->xsk_pool); + if (unlikely(!xsk_buffs[batch])) + goto err_reuse_batch; + } + + pi = mlx5e_icosq_get_next_pi(icosq, rq->mpwqe.umr_wqebbs); + umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); + memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe)); + + if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) { + for (i = 0; i < batch; i++) { + struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]); + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]); + + umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { + .ptag = cpu_to_be64(addr | MLX5_EN_WR), + }; + mxbuf->rq = rq; + } + } else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) { + for (i = 0; i < batch; i++) { + struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]); + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]); + + umr_wqe->inline_ksms[i] = (struct mlx5_ksm) { + .key = rq->mkey_be, + .va = cpu_to_be64(addr), + }; + mxbuf->rq = rq; + } + } else if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) { + u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2); + + for (i = 0; i < batch; i++) { + struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]); + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]); + + umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) { + .key = rq->mkey_be, + .va = cpu_to_be64(addr), + }; + umr_wqe->inline_ksms[(i << 2) + 1] = (struct mlx5_ksm) { + .key = rq->mkey_be, + .va = cpu_to_be64(addr + mapping_size), + }; + umr_wqe->inline_ksms[(i << 2) + 2] = (struct mlx5_ksm) { + .key = rq->mkey_be, + .va = cpu_to_be64(addr + mapping_size * 2), + }; + umr_wqe->inline_ksms[(i << 2) + 3] = (struct mlx5_ksm) { + .key = rq->mkey_be, + .va = cpu_to_be64(rq->wqe_overflow.addr), + }; + mxbuf->rq = rq; + } + } else { + __be32 pad_size = cpu_to_be32((1 << rq->mpwqe.page_shift) - + rq->xsk_pool->chunk_size); + __be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size); + + for (i = 0; i < batch; i++) { + struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]); + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]); + + umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) { + .key = rq->mkey_be, + .va = cpu_to_be64(addr), + .bcount = frame_size, + }; + umr_wqe->inline_klms[(i << 1) + 1] = (struct mlx5_klm) { + .key = rq->mkey_be, + .va = cpu_to_be64(rq->wqe_overflow.addr), + .bcount = pad_size, + }; + mxbuf->rq = rq; + } + } + + bitmap_zero(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); + wi->consumed_strides = 0; + + umr_wqe->ctrl.opmod_idx_opcode = + cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); + + /* Optimized for speed: keep in sync with mlx5e_mpwrq_umr_entry_size. */ + offset = ix * rq->mpwqe.mtts_per_wqe; + if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) + offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD; + else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_OVERSIZED)) + offset = offset * sizeof(struct mlx5_klm) * 2 / MLX5_OCTWORD; + else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) + offset = offset * sizeof(struct mlx5_ksm) * 4 / MLX5_OCTWORD; + umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); + + icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { + .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX, + .num_wqebbs = rq->mpwqe.umr_wqebbs, + .umr.rq = rq, + }; + + icosq->pc += rq->mpwqe.umr_wqebbs; + + icosq->doorbell_cseg = &umr_wqe->ctrl; + + return 0; + +err_reuse_batch: + while (--batch >= 0) + xsk_buff_free(xsk_buffs[batch]); + +err: + rq->stats->buff_alloc_err++; + return -ENOMEM; +} + +int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + struct xdp_buff **buffs; + u32 contig, alloc; + int i; + + /* Each rq->wqe.frags->xskp is 1:1 mapped to an element inside the + * rq->wqe.alloc_units->xsk_buffs array allocated here. + */ + buffs = rq->wqe.alloc_units->xsk_buffs; + contig = mlx5_wq_cyc_get_size(wq) - ix; + if (wqe_bulk <= contig) { + alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, wqe_bulk); + } else { + alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, contig); + if (likely(alloc == contig)) + alloc += xsk_buff_alloc_batch(rq->xsk_pool, buffs, wqe_bulk - contig); + } + + for (i = 0; i < alloc; i++) { + int j = mlx5_wq_cyc_ctr2ix(wq, ix + i); + struct mlx5e_wqe_frag_info *frag; + struct mlx5e_rx_wqe_cyc *wqe; + dma_addr_t addr; + + wqe = mlx5_wq_cyc_get_wqe(wq, j); + /* Assumes log_num_frags == 0. */ + frag = &rq->wqe.frags[j]; + + addr = xsk_buff_xdp_get_frame_dma(*frag->xskp); + wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom); + frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); + } + + return alloc; +} + +int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + int i; + + for (i = 0; i < wqe_bulk; i++) { + int j = mlx5_wq_cyc_ctr2ix(wq, ix + i); + struct mlx5e_wqe_frag_info *frag; + struct mlx5e_rx_wqe_cyc *wqe; + dma_addr_t addr; + + wqe = mlx5_wq_cyc_get_wqe(wq, j); + /* Assumes log_num_frags == 0. */ + frag = &rq->wqe.frags[j]; + + *frag->xskp = xsk_buff_alloc(rq->xsk_pool); + if (unlikely(!*frag->xskp)) + return i; + + addr = xsk_buff_xdp_get_frame_dma(*frag->xskp); + wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom); + frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); + } + + return wqe_bulk; +} + +static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, struct xdp_buff *xdp) +{ + u32 totallen = xdp->data_end - xdp->data_meta; + u32 metalen = xdp->data - xdp->data_meta; + struct sk_buff *skb; + + skb = napi_alloc_skb(rq->cq.napi, totallen); + if (unlikely(!skb)) { + rq->stats->buff_alloc_err++; + return NULL; + } + + skb_put_data(skb, xdp->data_meta, totallen); + + if (metalen) { + skb_metadata_set(skb, metalen); + __skb_pull(skb, metalen); + } + + return skb; +} + +struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi, + struct mlx5_cqe64 *cqe, + u16 cqe_bcnt, + u32 head_offset, + u32 page_idx) +{ + struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units.xsk_buffs[page_idx]); + struct bpf_prog *prog; + + /* Check packet size. Note LRO doesn't use linear SKB */ + if (unlikely(cqe_bcnt > rq->hw_mtu)) { + rq->stats->oversize_pkts_sw_drop++; + return NULL; + } + + /* head_offset is not used in this function, because xdp->data and the + * DMA address point directly to the necessary place. Furthermore, in + * the current implementation, UMR pages are mapped to XSK frames, so + * head_offset should always be 0. + */ + WARN_ON_ONCE(head_offset); + + /* mxbuf->rq is set on allocation, but cqe is per-packet so set it here */ + mxbuf->cqe = cqe; + xsk_buff_set_size(&mxbuf->xdp, cqe_bcnt); + xsk_buff_dma_sync_for_cpu(&mxbuf->xdp, rq->xsk_pool); + net_prefetch(mxbuf->xdp.data); + + /* Possible flows: + * - XDP_REDIRECT to XSKMAP: + * The page is owned by the userspace from now. + * - XDP_TX and other XDP_REDIRECTs: + * The page was returned by ZCA and recycled. + * - XDP_DROP: + * Recycle the page. + * - XDP_PASS: + * Allocate an SKB, copy the data and recycle the page. + * + * Pages to be recycled go to the Reuse Ring on MPWQE deallocation. Its + * size is the same as the Driver RX Ring's size, and pages for WQEs are + * allocated first from the Reuse Ring, so it has enough space. + */ + + prog = rcu_dereference(rq->xdp_prog); + if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) { + if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) + __set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */ + return NULL; /* page/packet was consumed by XDP */ + } + + /* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the + * frame. On SKB allocation failure, NULL is returned. + */ + return mlx5e_xsk_construct_skb(rq, &mxbuf->xdp); +} + +struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, + struct mlx5e_wqe_frag_info *wi, + struct mlx5_cqe64 *cqe, + u32 cqe_bcnt) +{ + struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(*wi->xskp); + struct bpf_prog *prog; + + /* wi->offset is not used in this function, because xdp->data and the + * DMA address point directly to the necessary place. Furthermore, the + * XSK allocator allocates frames per packet, instead of pages, so + * wi->offset should always be 0. + */ + WARN_ON_ONCE(wi->offset); + + /* mxbuf->rq is set on allocation, but cqe is per-packet so set it here */ + mxbuf->cqe = cqe; + xsk_buff_set_size(&mxbuf->xdp, cqe_bcnt); + xsk_buff_dma_sync_for_cpu(&mxbuf->xdp, rq->xsk_pool); + net_prefetch(mxbuf->xdp.data); + + prog = rcu_dereference(rq->xdp_prog); + if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) { + if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) + wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); + return NULL; /* page/packet was consumed by XDP */ + } + + /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse + * will be handled by mlx5e_free_rx_wqe. + * On SKB allocation failure, NULL is returned. + */ + return mlx5e_xsk_construct_skb(rq, &mxbuf->xdp); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h new file mode 100644 index 0000000000..cefc0ef610 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_RX_H__ +#define __MLX5_EN_XSK_RX_H__ + +#include "en.h" + +/* RX data path */ + +int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); +int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk); +int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk); +struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi, + struct mlx5_cqe64 *cqe, + u16 cqe_bcnt, + u32 head_offset, + u32 page_idx); +struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, + struct mlx5e_wqe_frag_info *wi, + struct mlx5_cqe64 *cqe, + u32 cqe_bcnt); + +#endif /* __MLX5_EN_XSK_RX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c new file mode 100644 index 0000000000..36826b5824 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "setup.h" +#include "en/params.h" +#include "en/txrx.h" +#include "en/health.h" +#include + +static int mlx5e_legacy_rq_validate_xsk(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + if (!mlx5e_rx_is_linear_skb(mdev, params, xsk)) { + mlx5_core_err(mdev, "Legacy RQ linear mode for XSK can't be activated with current params\n"); + return -EINVAL; + } + + return 0; +} + +/* The limitation of 2048 can be altered, but shouldn't go beyond the minimal + * stride size of striding RQ. + */ +#define MLX5E_MIN_XSK_CHUNK_SIZE max(2048, XDP_UMEM_MIN_CHUNK_SIZE) + +bool mlx5e_validate_xsk_param(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5_core_dev *mdev) +{ + /* AF_XDP doesn't support frames larger than PAGE_SIZE. */ + if (xsk->chunk_size > PAGE_SIZE || xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE) { + mlx5_core_err(mdev, "XSK chunk size %u out of bounds [%u, %lu]\n", xsk->chunk_size, + MLX5E_MIN_XSK_CHUNK_SIZE, PAGE_SIZE); + return false; + } + + /* frag_sz is different for regular and XSK RQs, so ensure that linear + * SKB mode is possible. + */ + switch (params->rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return !mlx5e_mpwrq_validate_xsk(mdev, params, xsk); + default: /* MLX5_WQ_TYPE_CYCLIC */ + return !mlx5e_legacy_rq_validate_xsk(mdev, params, xsk); + } +} + +static void mlx5e_build_xsk_cparam(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + u16 q_counter, + struct mlx5e_channel_param *cparam) +{ + mlx5e_build_rq_param(mdev, params, xsk, q_counter, &cparam->rq); + mlx5e_build_xdpsq_param(mdev, params, xsk, &cparam->xdp_sq); +} + +static int mlx5e_init_xsk_rq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct xsk_buff_pool *pool, + struct mlx5e_xsk_param *xsk, + struct mlx5e_rq *rq) +{ + struct mlx5_core_dev *mdev = c->mdev; + int rq_xdp_ix; + int err; + + rq->wq_type = params->rq_wq_type; + rq->pdev = c->pdev; + rq->netdev = c->netdev; + rq->priv = c->priv; + rq->tstamp = c->tstamp; + rq->clock = &mdev->clock; + rq->icosq = &c->icosq; + rq->ix = c->ix; + rq->channel = c; + rq->mdev = mdev; + rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + rq->xdpsq = &c->rq_xdpsq; + rq->xsk_pool = pool; + rq->stats = &c->priv->channel_stats[c->ix]->xskrq; + rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); + rq_xdp_ix = c->ix; + err = mlx5e_rq_set_handlers(rq, params, xsk); + if (err) + return err; + + return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix, c->napi.napi_id); +} + +static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_rq_param *rq_params, struct xsk_buff_pool *pool, + struct mlx5e_xsk_param *xsk) +{ + struct mlx5e_rq *xskrq = &c->xskrq; + int err; + + err = mlx5e_init_xsk_rq(c, params, pool, xsk, xskrq); + if (err) + return err; + + err = mlx5e_open_rq(params, rq_params, xsk, cpu_to_node(c->cpu), xskrq); + if (err) + return err; + + __set_bit(MLX5E_RQ_STATE_XSK, &xskrq->state); + return 0; +} + +int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, struct xsk_buff_pool *pool, + struct mlx5e_channel *c) +{ + struct mlx5e_channel_param *cparam; + struct mlx5e_create_cq_param ccp; + int err; + + mlx5e_build_create_cq_param(&ccp, c); + + if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev)) + return -EINVAL; + + cparam = kvzalloc(sizeof(*cparam), GFP_KERNEL); + if (!cparam) + return -ENOMEM; + + mlx5e_build_xsk_cparam(priv->mdev, params, xsk, priv->q_counter, cparam); + + err = mlx5e_open_cq(c->priv, params->rx_cq_moderation, &cparam->rq.cqp, &ccp, + &c->xskrq.cq); + if (unlikely(err)) + goto err_free_cparam; + + err = mlx5e_open_xsk_rq(c, params, &cparam->rq, pool, xsk); + if (unlikely(err)) + goto err_close_rx_cq; + + err = mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->xdp_sq.cqp, &ccp, + &c->xsksq.cq); + if (unlikely(err)) + goto err_close_rq; + + /* Create a separate SQ, so that when the buff pool is disabled, we could + * close this SQ safely and stop receiving CQEs. In other case, e.g., if + * the XDPSQ was used instead, we might run into trouble when the buff pool + * is disabled and then re-enabled, but the SQ continues receiving CQEs + * from the old buff pool. + */ + err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, pool, &c->xsksq, true); + if (unlikely(err)) + goto err_close_tx_cq; + + kvfree(cparam); + + set_bit(MLX5E_CHANNEL_STATE_XSK, c->state); + + return 0; + +err_close_tx_cq: + mlx5e_close_cq(&c->xsksq.cq); + +err_close_rq: + mlx5e_close_rq(&c->xskrq); + +err_close_rx_cq: + mlx5e_close_cq(&c->xskrq.cq); + +err_free_cparam: + kvfree(cparam); + + return err; +} + +void mlx5e_close_xsk(struct mlx5e_channel *c) +{ + clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state); + synchronize_net(); /* Sync with NAPI. */ + + mlx5e_close_rq(&c->xskrq); + mlx5e_close_cq(&c->xskrq.cq); + mlx5e_close_xdpsq(&c->xsksq); + mlx5e_close_cq(&c->xsksq.cq); + + memset(&c->xskrq, 0, sizeof(c->xskrq)); + memset(&c->xsksq, 0, sizeof(c->xsksq)); +} + +void mlx5e_activate_xsk(struct mlx5e_channel *c) +{ + /* ICOSQ recovery deactivates RQs. Suspend the recovery to avoid + * activating XSKRQ in the middle of recovery. + */ + mlx5e_reporter_icosq_suspend_recovery(c); + set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); + mlx5e_reporter_icosq_resume_recovery(c); + + /* TX queue is created active. */ +} + +void mlx5e_deactivate_xsk(struct mlx5e_channel *c) +{ + /* ICOSQ recovery may reactivate XSKRQ if clear_bit is called in the + * middle of recovery. Suspend the recovery to avoid it. + */ + mlx5e_reporter_icosq_suspend_recovery(c); + clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); + mlx5e_reporter_icosq_resume_recovery(c); + synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */ + + /* TX queue is disabled on close. */ +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h new file mode 100644 index 0000000000..50e111b85e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_SETUP_H__ +#define __MLX5_EN_XSK_SETUP_H__ + +#include "en.h" + +struct mlx5e_xsk_param; + +bool mlx5e_validate_xsk_param(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5_core_dev *mdev); +int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, struct xsk_buff_pool *pool, + struct mlx5e_channel *c); +void mlx5e_close_xsk(struct mlx5e_channel *c); +void mlx5e_activate_xsk(struct mlx5e_channel *c); +void mlx5e_deactivate_xsk(struct mlx5e_channel *c); + +#endif /* __MLX5_EN_XSK_SETUP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c new file mode 100644 index 0000000000..597f319d47 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "tx.h" +#include "pool.h" +#include "en/xdp.h" +#include "en/params.h" +#include + +int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_params *params = &priv->channels.params; + struct mlx5e_channel *c; + + if (unlikely(!mlx5e_xdp_is_active(priv))) + return -ENETDOWN; + + if (unlikely(qid >= params->num_channels)) + return -EINVAL; + + c = priv->channels.c[qid]; + + if (!napi_if_scheduled_mark_missed(&c->napi)) { + /* To avoid WQE overrun, don't post a NOP if async_icosq is not + * active and not polled by NAPI. Return 0, because the upcoming + * activate will trigger the IRQ for us. + */ + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &c->async_icosq.state))) + return 0; + + if (test_and_set_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->async_icosq.state)) + return 0; + + mlx5e_trigger_napi_icosq(c); + } + + return 0; +} + +/* When TX fails (because of the size of the packet), we need to get completions + * in order, so post a NOP to get a CQE. Since AF_XDP doesn't distinguish + * between successful TX and errors, handling in mlx5e_poll_xdpsq_cq is the + * same. + */ +static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq, + union mlx5e_xdp_info *xdpi) +{ + u16 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); + struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi]; + struct mlx5e_tx_wqe *nopwqe; + + wi->num_wqebbs = 1; + wi->num_pkts = 1; + + nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, *xdpi); + sq->doorbell_cseg = &nopwqe->ctrl; +} + +bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget) +{ + struct xsk_buff_pool *pool = sq->xsk_pool; + union mlx5e_xdp_info xdpi; + bool work_done = true; + bool flush = false; + + xdpi.mode = MLX5E_XDP_XMIT_MODE_XSK; + + for (; budget; budget--) { + int check_result = INDIRECT_CALL_2(sq->xmit_xdp_frame_check, + mlx5e_xmit_xdp_frame_check_mpwqe, + mlx5e_xmit_xdp_frame_check, + sq); + struct mlx5e_xmit_data xdptxd = {}; + struct xdp_desc desc; + bool ret; + + if (unlikely(check_result < 0)) { + work_done = false; + break; + } + + if (!xsk_tx_peek_desc(pool, &desc)) { + /* TX will get stuck until something wakes it up by + * triggering NAPI. Currently it's expected that the + * application calls sendto() if there are consumed, but + * not completed frames. + */ + break; + } + + xdptxd.dma_addr = xsk_buff_raw_get_dma(pool, desc.addr); + xdptxd.data = xsk_buff_raw_get_data(pool, desc.addr); + xdptxd.len = desc.len; + + xsk_buff_raw_dma_sync_for_device(pool, xdptxd.dma_addr, xdptxd.len); + + ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, + mlx5e_xmit_xdp_frame, sq, &xdptxd, + check_result); + if (unlikely(!ret)) { + if (sq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(sq); + + mlx5e_xsk_tx_post_err(sq, &xdpi); + } else { + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); + } + + flush = true; + } + + if (flush) { + if (sq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(sq); + mlx5e_xmit_xdp_doorbell(sq); + + xsk_tx_release(pool); + } + + return !(budget && work_done); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h new file mode 100644 index 0000000000..9c505158b9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_TX_H__ +#define __MLX5_EN_XSK_TX_H__ + +#include "en.h" + +/* TX data path */ + +int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags); + +bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget); + +#endif /* __MLX5_EN_XSK_TX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h new file mode 100644 index 0000000000..caa34b9c16 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5E_EN_ACCEL_H__ +#define __MLX5E_EN_ACCEL_H__ + +#include +#include +#include "en_accel/ipsec_rxtx.h" +#include "en_accel/ktls.h" +#include "en_accel/ktls_txrx.h" +#include +#include "en.h" +#include "en/txrx.h" + +#if IS_ENABLED(CONFIG_GENEVE) +#include + +static inline bool mlx5_geneve_tx_allowed(struct mlx5_core_dev *mdev) +{ + return mlx5_tx_swp_supported(mdev); +} + +static inline void +mlx5e_tx_tunnel_accel(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, u16 ihs) +{ + struct mlx5e_swp_spec swp_spec = {}; + unsigned int offset = 0; + __be16 l3_proto; + u8 l4_proto; + + l3_proto = vlan_get_protocol(skb); + switch (l3_proto) { + case htons(ETH_P_IP): + l4_proto = ip_hdr(skb)->protocol; + break; + case htons(ETH_P_IPV6): + l4_proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL); + break; + default: + return; + } + + if (l4_proto != IPPROTO_UDP || + udp_hdr(skb)->dest != cpu_to_be16(GENEVE_UDP_PORT)) + return; + swp_spec.l3_proto = l3_proto; + swp_spec.l4_proto = l4_proto; + swp_spec.is_tun = true; + if (inner_ip_hdr(skb)->version == 6) { + swp_spec.tun_l3_proto = htons(ETH_P_IPV6); + swp_spec.tun_l4_proto = inner_ipv6_hdr(skb)->nexthdr; + } else { + swp_spec.tun_l3_proto = htons(ETH_P_IP); + swp_spec.tun_l4_proto = inner_ip_hdr(skb)->protocol; + } + + mlx5e_set_eseg_swp(skb, eseg, &swp_spec); + if (skb_vlan_tag_present(skb) && ihs) + mlx5e_eseg_swp_offsets_add_vlan(eseg); +} + +#else +static inline bool mlx5_geneve_tx_allowed(struct mlx5_core_dev *mdev) +{ + return false; +} + +#endif /* CONFIG_GENEVE */ + +static inline void +mlx5e_udp_gso_handle_tx_skb(struct sk_buff *skb) +{ + int payload_len = skb_shinfo(skb)->gso_size + sizeof(struct udphdr); + + udp_hdr(skb)->len = htons(payload_len); +} + +struct mlx5e_accel_tx_state { +#ifdef CONFIG_MLX5_EN_TLS + struct mlx5e_accel_tx_tls_state tls; +#endif +#ifdef CONFIG_MLX5_EN_IPSEC + struct mlx5e_accel_tx_ipsec_state ipsec; +#endif +}; + +static inline bool mlx5e_accel_tx_begin(struct net_device *dev, + struct mlx5e_txqsq *sq, + struct sk_buff *skb, + struct mlx5e_accel_tx_state *state) +{ + if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) + mlx5e_udp_gso_handle_tx_skb(skb); + +#ifdef CONFIG_MLX5_EN_TLS + /* May send WQEs. */ + if (tls_is_skb_tx_device_offloaded(skb)) + if (unlikely(!mlx5e_ktls_handle_tx_skb(dev, sq, skb, + &state->tls))) + return false; +#endif + +#ifdef CONFIG_MLX5_EN_IPSEC + if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state) && xfrm_offload(skb)) { + if (unlikely(!mlx5e_ipsec_handle_tx_skb(dev, skb, &state->ipsec))) + return false; + } +#endif + +#ifdef CONFIG_MLX5_MACSEC + if (unlikely(mlx5e_macsec_skb_is_offload(skb))) { + struct mlx5e_priv *priv = netdev_priv(dev); + + if (unlikely(!mlx5e_macsec_handle_tx_skb(priv->macsec, skb))) + return false; + } +#endif + + return true; +} + +static inline unsigned int mlx5e_accel_tx_ids_len(struct mlx5e_txqsq *sq, + struct mlx5e_accel_tx_state *state) +{ +#ifdef CONFIG_MLX5_EN_IPSEC + if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state)) + return mlx5e_ipsec_tx_ids_len(&state->ipsec); +#endif + + return 0; +} + +/* Part of the eseg touched by TX offloads */ +#define MLX5E_ACCEL_ESEG_LEN offsetof(struct mlx5_wqe_eth_seg, mss) + +static inline void mlx5e_accel_tx_eseg(struct mlx5e_priv *priv, + struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg, u16 ihs) +{ +#ifdef CONFIG_MLX5_EN_IPSEC + if (xfrm_offload(skb)) + mlx5e_ipsec_tx_build_eseg(priv, skb, eseg); +#endif + +#ifdef CONFIG_MLX5_MACSEC + if (unlikely(mlx5e_macsec_skb_is_offload(skb))) + mlx5e_macsec_tx_build_eseg(priv->macsec, skb, eseg); +#endif + +#if IS_ENABLED(CONFIG_GENEVE) + if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL) + mlx5e_tx_tunnel_accel(skb, eseg, ihs); +#endif +} + +static inline void mlx5e_accel_tx_finish(struct mlx5e_txqsq *sq, + struct mlx5e_tx_wqe *wqe, + struct mlx5e_accel_tx_state *state, + struct mlx5_wqe_inline_seg *inlseg) +{ +#ifdef CONFIG_MLX5_EN_TLS + mlx5e_ktls_handle_tx_wqe(&wqe->ctrl, &state->tls); +#endif + +#ifdef CONFIG_MLX5_EN_IPSEC + if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state) && + state->ipsec.xo && state->ipsec.tailen) + mlx5e_ipsec_handle_tx_wqe(wqe, &state->ipsec, inlseg); +#endif +} + +static inline int mlx5e_accel_init_rx(struct mlx5e_priv *priv) +{ + return mlx5e_ktls_init_rx(priv); +} + +static inline void mlx5e_accel_cleanup_rx(struct mlx5e_priv *priv) +{ + mlx5e_ktls_cleanup_rx(priv); +} + +static inline int mlx5e_accel_init_tx(struct mlx5e_priv *priv) +{ + return mlx5e_ktls_init_tx(priv); +} + +static inline void mlx5e_accel_cleanup_tx(struct mlx5e_priv *priv) +{ + mlx5e_ktls_cleanup_tx(priv); +} +#endif /* __MLX5E_EN_ACCEL_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c new file mode 100644 index 0000000000..c7d191f66a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c @@ -0,0 +1,404 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#include +#include "en_accel/fs_tcp.h" +#include "fs_core.h" + +enum accel_fs_tcp_type { + ACCEL_FS_IPV4_TCP, + ACCEL_FS_IPV6_TCP, + ACCEL_FS_TCP_NUM_TYPES, +}; + +struct mlx5e_accel_fs_tcp { + struct mlx5e_flow_table tables[ACCEL_FS_TCP_NUM_TYPES]; + struct mlx5_flow_handle *default_rules[ACCEL_FS_TCP_NUM_TYPES]; +}; + +static enum mlx5_traffic_types fs_accel2tt(enum accel_fs_tcp_type i) +{ + switch (i) { + case ACCEL_FS_IPV4_TCP: + return MLX5_TT_IPV4_TCP; + default: /* ACCEL_FS_IPV6_TCP */ + return MLX5_TT_IPV6_TCP; + } +} + +static void accel_fs_tcp_set_ipv4_flow(struct mlx5_flow_spec *spec, struct sock *sk) +{ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_TCP); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, 4); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4), + &inet_sk(sk)->inet_daddr, 4); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &inet_sk(sk)->inet_rcv_saddr, 4); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); +} + +#if IS_ENABLED(CONFIG_IPV6) +static void accel_fs_tcp_set_ipv6_flow(struct mlx5_flow_spec *spec, struct sock *sk) +{ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_TCP); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, 6); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), + &sk->sk_v6_daddr, 16); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &inet6_sk(sk)->saddr, 16); + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), + 0xff, 16); + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xff, 16); +} +#endif + +void mlx5e_accel_fs_del_sk(struct mlx5_flow_handle *rule) +{ + mlx5_del_flow_rules(rule); +} + +struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_flow_steering *fs, + struct sock *sk, u32 tirn, + uint32_t flow_tag) +{ + struct mlx5e_accel_fs_tcp *fs_tcp = mlx5e_fs_get_accel_tcp(fs); + struct mlx5_flow_destination dest = {}; + struct mlx5e_flow_table *ft = NULL; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *flow; + struct mlx5_flow_spec *spec; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + switch (sk->sk_family) { + case AF_INET: + accel_fs_tcp_set_ipv4_flow(spec, sk); + ft = &fs_tcp->tables[ACCEL_FS_IPV4_TCP]; + fs_dbg(fs, "%s flow is %pI4:%d -> %pI4:%d\n", __func__, + &inet_sk(sk)->inet_rcv_saddr, + inet_sk(sk)->inet_sport, + &inet_sk(sk)->inet_daddr, + inet_sk(sk)->inet_dport); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + if (!ipv6_only_sock(sk) && + ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED) { + accel_fs_tcp_set_ipv4_flow(spec, sk); + ft = &fs_tcp->tables[ACCEL_FS_IPV4_TCP]; + } else { + accel_fs_tcp_set_ipv6_flow(spec, sk); + ft = &fs_tcp->tables[ACCEL_FS_IPV6_TCP]; + } + break; +#endif + default: + break; + } + + if (!ft) { + flow = ERR_PTR(-EINVAL); + goto out; + } + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.tcp_dport); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.tcp_sport); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_dport, + ntohs(inet_sk(sk)->inet_sport)); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_sport, + ntohs(inet_sk(sk)->inet_dport)); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = tirn; + if (flow_tag != MLX5_FS_DEFAULT_FLOW_TAG) { + spec->flow_context.flow_tag = flow_tag; + spec->flow_context.flags = FLOW_CONTEXT_HAS_TAG; + } + + flow = mlx5_add_flow_rules(ft->t, spec, &flow_act, &dest, 1); + + if (IS_ERR(flow)) + fs_err(fs, "mlx5_add_flow_rules() failed, flow is %ld\n", PTR_ERR(flow)); + +out: + kvfree(spec); + return flow; +} + +static int accel_fs_tcp_add_default_rule(struct mlx5e_flow_steering *fs, + enum accel_fs_tcp_type type) +{ + struct mlx5e_accel_fs_tcp *fs_tcp = mlx5e_fs_get_accel_tcp(fs); + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + struct mlx5e_flow_table *accel_fs_t; + struct mlx5_flow_destination dest; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + int err = 0; + + accel_fs_t = &fs_tcp->tables[type]; + + dest = mlx5_ttc_get_default_dest(ttc, fs_accel2tt(type)); + rule = mlx5_add_flow_rules(accel_fs_t->t, NULL, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + fs_err(fs, "%s: add default rule failed, accel_fs type=%d, err %d\n", + __func__, type, err); + return err; + } + + fs_tcp->default_rules[type] = rule; + return 0; +} + +#define MLX5E_ACCEL_FS_TCP_NUM_GROUPS (2) +#define MLX5E_ACCEL_FS_TCP_GROUP1_SIZE (BIT(16) - 1) +#define MLX5E_ACCEL_FS_TCP_GROUP2_SIZE (BIT(0)) +#define MLX5E_ACCEL_FS_TCP_TABLE_SIZE (MLX5E_ACCEL_FS_TCP_GROUP1_SIZE +\ + MLX5E_ACCEL_FS_TCP_GROUP2_SIZE) +static int accel_fs_tcp_create_groups(struct mlx5e_flow_table *ft, + enum accel_fs_tcp_type type) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *outer_headers_c; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_ACCEL_FS_TCP_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in || !ft->g) { + kfree(ft->g); + ft->g = NULL; + kvfree(in); + return -ENOMEM; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_version); + + switch (type) { + case ACCEL_FS_IPV4_TCP: + case ACCEL_FS_IPV6_TCP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport); + break; + default: + err = -EINVAL; + goto out; + } + + switch (type) { + case ACCEL_FS_IPV4_TCP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + break; + case ACCEL_FS_IPV6_TCP: + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + 0xff, 16); + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xff, 16); + break; + default: + err = -EINVAL; + goto out; + } + + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_ACCEL_FS_TCP_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Default Flow Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_ACCEL_FS_TCP_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; +out: + kvfree(in); + + return err; +} + +static int accel_fs_tcp_create_table(struct mlx5e_flow_steering *fs, enum accel_fs_tcp_type type) +{ + struct mlx5e_accel_fs_tcp *accel_tcp = mlx5e_fs_get_accel_tcp(fs); + struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false); + struct mlx5e_flow_table *ft = &accel_tcp->tables[type]; + struct mlx5_flow_table_attr ft_attr = {}; + int err; + + ft->num_groups = 0; + + ft_attr.max_fte = MLX5E_ACCEL_FS_TCP_TABLE_SIZE; + ft_attr.level = MLX5E_ACCEL_FS_TCP_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + fs_dbg(fs, "Created fs accel table id %u level %u\n", + ft->t->id, ft->t->level); + + err = accel_fs_tcp_create_groups(ft, type); + if (err) + goto err; + + err = accel_fs_tcp_add_default_rule(fs, type); + if (err) + goto err; + + return 0; +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +static int accel_fs_tcp_disable(struct mlx5e_flow_steering *fs) +{ + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + int err, i; + + for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) { + /* Modify ttc rules destination to point back to the indir TIRs */ + err = mlx5_ttc_fwd_default_dest(ttc, fs_accel2tt(i)); + if (err) { + fs_err(fs, + "%s: modify ttc[%d] default destination failed, err(%d)\n", + __func__, fs_accel2tt(i), err); + return err; + } + } + + return 0; +} + +static int accel_fs_tcp_enable(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_accel_fs_tcp *accel_tcp = mlx5e_fs_get_accel_tcp(fs); + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + struct mlx5_flow_destination dest = {}; + int err, i; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) { + dest.ft = accel_tcp->tables[i].t; + + /* Modify ttc rules destination to point on the accel_fs FTs */ + err = mlx5_ttc_fwd_dest(ttc, fs_accel2tt(i), &dest); + if (err) { + fs_err(fs, "%s: modify ttc[%d] destination to accel failed, err(%d)\n", + __func__, fs_accel2tt(i), err); + return err; + } + } + return 0; +} + +static void accel_fs_tcp_destroy_table(struct mlx5e_flow_steering *fs, int i) +{ + struct mlx5e_accel_fs_tcp *fs_tcp = mlx5e_fs_get_accel_tcp(fs); + + if (IS_ERR_OR_NULL(fs_tcp->tables[i].t)) + return; + + mlx5_del_flow_rules(fs_tcp->default_rules[i]); + mlx5e_destroy_flow_table(&fs_tcp->tables[i]); + fs_tcp->tables[i].t = NULL; +} + +void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_accel_fs_tcp *accel_tcp = mlx5e_fs_get_accel_tcp(fs); + int i; + + if (!accel_tcp) + return; + + accel_fs_tcp_disable(fs); + + for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) + accel_fs_tcp_destroy_table(fs, i); + + kfree(accel_tcp); + mlx5e_fs_set_accel_tcp(fs, NULL); +} + +int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_accel_fs_tcp *accel_tcp; + int i, err; + + if (!MLX5_CAP_FLOWTABLE_NIC_RX(mlx5e_fs_get_mdev(fs), ft_field_support.outer_ip_version)) + return -EOPNOTSUPP; + + accel_tcp = kzalloc(sizeof(*accel_tcp), GFP_KERNEL); + if (!accel_tcp) + return -ENOMEM; + mlx5e_fs_set_accel_tcp(fs, accel_tcp); + + for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) { + err = accel_fs_tcp_create_table(fs, i); + if (err) + goto err_destroy_tables; + } + + err = accel_fs_tcp_enable(fs); + if (err) + goto err_destroy_tables; + + return 0; + +err_destroy_tables: + while (--i >= 0) + accel_fs_tcp_destroy_table(fs, i); + kfree(accel_tcp); + mlx5e_fs_set_accel_tcp(fs, NULL); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h new file mode 100644 index 0000000000..a032bff482 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5E_ACCEL_FS_TCP_H__ +#define __MLX5E_ACCEL_FS_TCP_H__ + +#include "en/fs.h" + +#ifdef CONFIG_MLX5_EN_TLS +int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs); +void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs); +struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_flow_steering *fs, + struct sock *sk, u32 tirn, + uint32_t flow_tag); +void mlx5e_accel_fs_del_sk(struct mlx5_flow_handle *rule); +#else +static inline int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs) { return 0; } +static inline void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs) {} +static inline struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_flow_steering *fs, + struct sock *sk, u32 tirn, + uint32_t flow_tag) +{ return ERR_PTR(-EOPNOTSUPP); } +static inline void mlx5e_accel_fs_del_sk(struct mlx5_flow_handle *rule) {} +#endif + +#endif /* __MLX5E_ACCEL_FS_TCP_H__ */ + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c new file mode 100644 index 0000000000..e2ffc572de --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -0,0 +1,1191 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include +#include +#include +#include +#include + +#include "en.h" +#include "eswitch.h" +#include "ipsec.h" +#include "ipsec_rxtx.h" +#include "en_rep.h" + +#define MLX5_IPSEC_RESCHED msecs_to_jiffies(1000) +#define MLX5E_IPSEC_TUNNEL_SA XA_MARK_1 + +static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x) +{ + return (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle; +} + +static struct mlx5e_ipsec_pol_entry *to_ipsec_pol_entry(struct xfrm_policy *x) +{ + return (struct mlx5e_ipsec_pol_entry *)x->xdo.offload_handle; +} + +static void mlx5e_ipsec_handle_tx_limit(struct work_struct *_work) +{ + struct mlx5e_ipsec_dwork *dwork = + container_of(_work, struct mlx5e_ipsec_dwork, dwork.work); + struct mlx5e_ipsec_sa_entry *sa_entry = dwork->sa_entry; + struct xfrm_state *x = sa_entry->x; + + if (sa_entry->attrs.drop) + return; + + spin_lock_bh(&x->lock); + xfrm_state_check_expire(x); + if (x->km.state == XFRM_STATE_EXPIRED) { + sa_entry->attrs.drop = true; + spin_unlock_bh(&x->lock); + + mlx5e_accel_ipsec_fs_modify(sa_entry); + return; + } + spin_unlock_bh(&x->lock); + + queue_delayed_work(sa_entry->ipsec->wq, &dwork->dwork, + MLX5_IPSEC_RESCHED); +} + +static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct xfrm_state *x = sa_entry->x; + u32 seq_bottom = 0; + u32 esn, esn_msb; + u8 overlap; + + switch (x->xso.type) { + case XFRM_DEV_OFFLOAD_PACKET: + switch (x->xso.dir) { + case XFRM_DEV_OFFLOAD_IN: + esn = x->replay_esn->seq; + esn_msb = x->replay_esn->seq_hi; + break; + case XFRM_DEV_OFFLOAD_OUT: + esn = x->replay_esn->oseq; + esn_msb = x->replay_esn->oseq_hi; + break; + default: + WARN_ON(true); + return false; + } + break; + case XFRM_DEV_OFFLOAD_CRYPTO: + /* Already parsed by XFRM core */ + esn = x->replay_esn->seq; + break; + default: + WARN_ON(true); + return false; + } + + overlap = sa_entry->esn_state.overlap; + + if (esn >= x->replay_esn->replay_window) + seq_bottom = esn - x->replay_esn->replay_window + 1; + + if (x->xso.type == XFRM_DEV_OFFLOAD_CRYPTO) + esn_msb = xfrm_replay_seqhi(x, htonl(seq_bottom)); + + if (sa_entry->esn_state.esn_msb) + sa_entry->esn_state.esn = esn; + else + /* According to RFC4303, section "3.3.3. Sequence Number Generation", + * the first packet sent using a given SA will contain a sequence + * number of 1. + */ + sa_entry->esn_state.esn = max_t(u32, esn, 1); + sa_entry->esn_state.esn_msb = esn_msb; + + if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) { + sa_entry->esn_state.overlap = 0; + return true; + } else if (unlikely(!overlap && + (seq_bottom >= MLX5E_IPSEC_ESN_SCOPE_MID))) { + sa_entry->esn_state.overlap = 1; + return true; + } + + return false; +} + +static void mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + struct xfrm_state *x = sa_entry->x; + s64 start_value, n; + + attrs->lft.hard_packet_limit = x->lft.hard_packet_limit; + attrs->lft.soft_packet_limit = x->lft.soft_packet_limit; + if (x->lft.soft_packet_limit == XFRM_INF) + return; + + /* Compute hard limit initial value and number of rounds. + * + * The counting pattern of hardware counter goes: + * value -> 2^31-1 + * 2^31 | (2^31-1) -> 2^31-1 + * 2^31 | (2^31-1) -> 2^31-1 + * [..] + * 2^31 | (2^31-1) -> 0 + * + * The pattern is created by using an ASO operation to atomically set + * bit 31 after the down counter clears bit 31. This is effectively an + * atomic addition of 2**31 to the counter. + * + * We wish to configure the counter, within the above pattern, so that + * when it reaches 0, it has hit the hard limit. This is defined by this + * system of equations: + * + * hard_limit == start_value + n * 2^31 + * n >= 0 + * start_value < 2^32, start_value >= 0 + * + * These equations are not single-solution, there are often two choices: + * hard_limit == start_value + n * 2^31 + * hard_limit == (start_value+2^31) + (n-1) * 2^31 + * + * The algorithm selects the solution that keeps the counter value + * above 2^31 until the final iteration. + */ + + /* Start by estimating n and compute start_value */ + n = attrs->lft.hard_packet_limit / BIT_ULL(31); + start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31); + + /* Choose the best of the two solutions: */ + if (n >= 1) + n -= 1; + + /* Computed values solve the system of equations: */ + start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31); + + /* The best solution means: when there are multiple iterations we must + * start above 2^31 and count down to 2**31 to get the interrupt. + */ + attrs->lft.hard_packet_limit = lower_32_bits(start_value); + attrs->lft.numb_rounds_hard = (u64)n; + + /* Compute soft limit initial value and number of rounds. + * + * The soft_limit is achieved by adjusting the counter's + * interrupt_value. This is embedded in the counting pattern created by + * hard packet calculations above. + * + * We wish to compute the interrupt_value for the soft_limit. This is + * defined by this system of equations: + * + * soft_limit == start_value - soft_value + n * 2^31 + * n >= 0 + * soft_value < 2^32, soft_value >= 0 + * for n == 0 start_value > soft_value + * + * As with compute_hard_n_value() the equations are not single-solution. + * The algorithm selects the solution that has: + * 2^30 <= soft_limit < 2^31 + 2^30 + * for the interior iterations, which guarantees a large guard band + * around the counter hard limit and next interrupt. + */ + + /* Start by estimating n and compute soft_value */ + n = (x->lft.soft_packet_limit - attrs->lft.hard_packet_limit) / BIT_ULL(31); + start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - + x->lft.soft_packet_limit; + + /* Compare against constraints and adjust n */ + if (n < 0) + n = 0; + else if (start_value >= BIT_ULL(32)) + n -= 1; + else if (start_value < 0) + n += 1; + + /* Choose the best of the two solutions: */ + start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value; + if (n != attrs->lft.numb_rounds_hard && start_value < BIT_ULL(30)) + n += 1; + + /* Note that the upper limit of soft_value happens naturally because we + * always select the lowest soft_value. + */ + + /* Computed values solve the system of equations: */ + start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value; + + /* The best solution means: when there are multiple iterations we must + * not fall below 2^30 as that would get too close to the false + * hard_limit and when we reach an interior iteration for soft_limit it + * has to be far away from 2**32-1 which is the counter reset point + * after the +2^31 to accommodate latency. + */ + attrs->lft.soft_packet_limit = lower_32_bits(start_value); + attrs->lft.numb_rounds_soft = (u64)n; +} + +static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); + struct xfrm_state *x = sa_entry->x; + struct net_device *netdev; + struct neighbour *n; + u8 addr[ETH_ALEN]; + const void *pkey; + u8 *dst, *src; + + if (attrs->mode != XFRM_MODE_TUNNEL || + attrs->type != XFRM_DEV_OFFLOAD_PACKET) + return; + + netdev = x->xso.real_dev; + + mlx5_query_mac_address(mdev, addr); + switch (attrs->dir) { + case XFRM_DEV_OFFLOAD_IN: + src = attrs->dmac; + dst = attrs->smac; + pkey = &attrs->saddr.a4; + break; + case XFRM_DEV_OFFLOAD_OUT: + src = attrs->smac; + dst = attrs->dmac; + pkey = &attrs->daddr.a4; + break; + default: + return; + } + + ether_addr_copy(src, addr); + n = neigh_lookup(&arp_tbl, pkey, netdev); + if (!n) { + n = neigh_create(&arp_tbl, pkey, netdev); + if (IS_ERR(n)) + return; + neigh_event_send(n, NULL); + attrs->drop = true; + } else { + neigh_ha_snapshot(addr, n, netdev); + ether_addr_copy(dst, addr); + } + neigh_release(n); +} + +void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + struct xfrm_state *x = sa_entry->x; + struct aes_gcm_keymat *aes_gcm = &attrs->aes_gcm; + struct aead_geniv_ctx *geniv_ctx; + struct crypto_aead *aead; + unsigned int crypto_data_len, key_len; + int ivsize; + + memset(attrs, 0, sizeof(*attrs)); + + /* key */ + crypto_data_len = (x->aead->alg_key_len + 7) / 8; + key_len = crypto_data_len - 4; /* 4 bytes salt at end */ + + memcpy(aes_gcm->aes_key, x->aead->alg_key, key_len); + aes_gcm->key_len = key_len * 8; + + /* salt and seq_iv */ + aead = x->data; + geniv_ctx = crypto_aead_ctx(aead); + ivsize = crypto_aead_ivsize(aead); + memcpy(&aes_gcm->seq_iv, &geniv_ctx->salt, ivsize); + memcpy(&aes_gcm->salt, x->aead->alg_key + key_len, + sizeof(aes_gcm->salt)); + + attrs->authsize = crypto_aead_authsize(aead) / 4; /* in dwords */ + + /* iv len */ + aes_gcm->icv_len = x->aead->alg_icv_len; + + attrs->dir = x->xso.dir; + + /* esn */ + if (x->props.flags & XFRM_STATE_ESN) { + attrs->replay_esn.trigger = true; + attrs->replay_esn.esn = sa_entry->esn_state.esn; + attrs->replay_esn.esn_msb = sa_entry->esn_state.esn_msb; + attrs->replay_esn.overlap = sa_entry->esn_state.overlap; + if (attrs->dir == XFRM_DEV_OFFLOAD_OUT) + goto skip_replay_window; + + switch (x->replay_esn->replay_window) { + case 32: + attrs->replay_esn.replay_window = + MLX5_IPSEC_ASO_REPLAY_WIN_32BIT; + break; + case 64: + attrs->replay_esn.replay_window = + MLX5_IPSEC_ASO_REPLAY_WIN_64BIT; + break; + case 128: + attrs->replay_esn.replay_window = + MLX5_IPSEC_ASO_REPLAY_WIN_128BIT; + break; + case 256: + attrs->replay_esn.replay_window = + MLX5_IPSEC_ASO_REPLAY_WIN_256BIT; + break; + default: + WARN_ON(true); + return; + } + } + +skip_replay_window: + /* spi */ + attrs->spi = be32_to_cpu(x->id.spi); + + /* source , destination ips */ + memcpy(&attrs->saddr, x->props.saddr.a6, sizeof(attrs->saddr)); + memcpy(&attrs->daddr, x->id.daddr.a6, sizeof(attrs->daddr)); + attrs->family = x->props.family; + attrs->type = x->xso.type; + attrs->reqid = x->props.reqid; + attrs->upspec.dport = ntohs(x->sel.dport); + attrs->upspec.dport_mask = ntohs(x->sel.dport_mask); + attrs->upspec.sport = ntohs(x->sel.sport); + attrs->upspec.sport_mask = ntohs(x->sel.sport_mask); + attrs->upspec.proto = x->sel.proto; + attrs->mode = x->props.mode; + + mlx5e_ipsec_init_limits(sa_entry, attrs); + mlx5e_ipsec_init_macs(sa_entry, attrs); + + if (x->encap) { + attrs->encap = true; + attrs->sport = x->encap->encap_sport; + attrs->dport = x->encap->encap_dport; + } +} + +static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, + struct xfrm_state *x, + struct netlink_ext_ack *extack) +{ + if (x->props.aalgo != SADB_AALG_NONE) { + NL_SET_ERR_MSG_MOD(extack, "Cannot offload authenticated xfrm states"); + return -EINVAL; + } + if (x->props.ealgo != SADB_X_EALG_AES_GCM_ICV16) { + NL_SET_ERR_MSG_MOD(extack, "Only AES-GCM-ICV16 xfrm state may be offloaded"); + return -EINVAL; + } + if (x->props.calgo != SADB_X_CALG_NONE) { + NL_SET_ERR_MSG_MOD(extack, "Cannot offload compressed xfrm states"); + return -EINVAL; + } + if (x->props.flags & XFRM_STATE_ESN && + !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ESN)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot offload ESN xfrm states"); + return -EINVAL; + } + if (x->props.family != AF_INET && + x->props.family != AF_INET6) { + NL_SET_ERR_MSG_MOD(extack, "Only IPv4/6 xfrm states may be offloaded"); + return -EINVAL; + } + if (x->id.proto != IPPROTO_ESP) { + NL_SET_ERR_MSG_MOD(extack, "Only ESP xfrm state may be offloaded"); + return -EINVAL; + } + if (x->encap) { + if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ESPINUDP)) { + NL_SET_ERR_MSG_MOD(extack, "Encapsulation is not supported"); + return -EINVAL; + } + + if (x->encap->encap_type != UDP_ENCAP_ESPINUDP) { + NL_SET_ERR_MSG_MOD(extack, "Encapsulation other than UDP is not supported"); + return -EINVAL; + } + + if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) { + NL_SET_ERR_MSG_MOD(extack, "Encapsulation is supported in packet offload mode only"); + return -EINVAL; + } + + if (x->props.mode != XFRM_MODE_TRANSPORT) { + NL_SET_ERR_MSG_MOD(extack, "Encapsulation is supported in transport mode only"); + return -EINVAL; + } + } + if (!x->aead) { + NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without aead"); + return -EINVAL; + } + if (x->aead->alg_icv_len != 128) { + NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with AEAD ICV length other than 128bit"); + return -EINVAL; + } + if ((x->aead->alg_key_len != 128 + 32) && + (x->aead->alg_key_len != 256 + 32)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with AEAD key length other than 128/256 bit"); + return -EINVAL; + } + if (x->tfcpad) { + NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with tfc padding"); + return -EINVAL; + } + if (!x->geniv) { + NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without geniv"); + return -EINVAL; + } + if (strcmp(x->geniv, "seqiv")) { + NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with geniv other than seqiv"); + return -EINVAL; + } + + if (x->sel.proto != IPPROTO_IP && x->sel.proto != IPPROTO_UDP && + x->sel.proto != IPPROTO_TCP) { + NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than TCP/UDP"); + return -EINVAL; + } + + if (x->props.mode != XFRM_MODE_TRANSPORT && x->props.mode != XFRM_MODE_TUNNEL) { + NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded"); + return -EINVAL; + } + + switch (x->xso.type) { + case XFRM_DEV_OFFLOAD_CRYPTO: + if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO)) { + NL_SET_ERR_MSG_MOD(extack, "Crypto offload is not supported"); + return -EINVAL; + } + + break; + case XFRM_DEV_OFFLOAD_PACKET: + if (!(mlx5_ipsec_device_caps(mdev) & + MLX5_IPSEC_CAP_PACKET_OFFLOAD)) { + NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported"); + return -EINVAL; + } + + if (x->props.mode == XFRM_MODE_TUNNEL && + !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)) { + NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported for tunnel mode"); + return -EINVAL; + } + + if (x->replay_esn && x->xso.dir == XFRM_DEV_OFFLOAD_IN && + x->replay_esn->replay_window != 32 && + x->replay_esn->replay_window != 64 && + x->replay_esn->replay_window != 128 && + x->replay_esn->replay_window != 256) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported replay window size"); + return -EINVAL; + } + + if (!x->props.reqid) { + NL_SET_ERR_MSG_MOD(extack, "Cannot offload without reqid"); + return -EINVAL; + } + + if (x->lft.hard_byte_limit != XFRM_INF || + x->lft.soft_byte_limit != XFRM_INF) { + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support limits in bytes"); + return -EINVAL; + } + + if (x->lft.soft_packet_limit >= x->lft.hard_packet_limit && + x->lft.hard_packet_limit != XFRM_INF) { + /* XFRM stack doesn't prevent such configuration :(. */ + NL_SET_ERR_MSG_MOD(extack, "Hard packet limit must be greater than soft one"); + return -EINVAL; + } + + if (!x->lft.soft_packet_limit || !x->lft.hard_packet_limit) { + NL_SET_ERR_MSG_MOD(extack, "Soft/hard packet limits can't be 0"); + return -EINVAL; + } + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type"); + return -EINVAL; + } + return 0; +} + +static void mlx5e_ipsec_modify_state(struct work_struct *_work) +{ + struct mlx5e_ipsec_work *work = + container_of(_work, struct mlx5e_ipsec_work, work); + struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry; + struct mlx5_accel_esp_xfrm_attrs *attrs; + + attrs = &((struct mlx5e_ipsec_sa_entry *)work->data)->attrs; + + mlx5_accel_esp_modify_xfrm(sa_entry, attrs); +} + +static void mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct xfrm_state *x = sa_entry->x; + + if (x->xso.type != XFRM_DEV_OFFLOAD_CRYPTO || + x->xso.dir != XFRM_DEV_OFFLOAD_OUT) + return; + + if (x->props.flags & XFRM_STATE_ESN) { + sa_entry->set_iv_op = mlx5e_ipsec_set_iv_esn; + return; + } + + sa_entry->set_iv_op = mlx5e_ipsec_set_iv; +} + +static void mlx5e_ipsec_handle_netdev_event(struct work_struct *_work) +{ + struct mlx5e_ipsec_work *work = + container_of(_work, struct mlx5e_ipsec_work, work); + struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry; + struct mlx5e_ipsec_netevent_data *data = work->data; + struct mlx5_accel_esp_xfrm_attrs *attrs; + + attrs = &sa_entry->attrs; + + switch (attrs->dir) { + case XFRM_DEV_OFFLOAD_IN: + ether_addr_copy(attrs->smac, data->addr); + break; + case XFRM_DEV_OFFLOAD_OUT: + ether_addr_copy(attrs->dmac, data->addr); + break; + default: + WARN_ON_ONCE(true); + } + attrs->drop = false; + mlx5e_accel_ipsec_fs_modify(sa_entry); +} + +static int mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct xfrm_state *x = sa_entry->x; + struct mlx5e_ipsec_work *work; + void *data = NULL; + + switch (x->xso.type) { + case XFRM_DEV_OFFLOAD_CRYPTO: + if (!(x->props.flags & XFRM_STATE_ESN)) + return 0; + break; + case XFRM_DEV_OFFLOAD_PACKET: + if (x->props.mode != XFRM_MODE_TUNNEL) + return 0; + break; + default: + break; + } + + work = kzalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return -ENOMEM; + + switch (x->xso.type) { + case XFRM_DEV_OFFLOAD_CRYPTO: + data = kzalloc(sizeof(*sa_entry), GFP_KERNEL); + if (!data) + goto free_work; + + INIT_WORK(&work->work, mlx5e_ipsec_modify_state); + break; + case XFRM_DEV_OFFLOAD_PACKET: + data = kzalloc(sizeof(struct mlx5e_ipsec_netevent_data), + GFP_KERNEL); + if (!data) + goto free_work; + + INIT_WORK(&work->work, mlx5e_ipsec_handle_netdev_event); + break; + default: + break; + } + + work->data = data; + work->sa_entry = sa_entry; + sa_entry->work = work; + return 0; + +free_work: + kfree(work); + return -ENOMEM; +} + +static int mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct xfrm_state *x = sa_entry->x; + struct mlx5e_ipsec_dwork *dwork; + + if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) + return 0; + + if (x->xso.dir != XFRM_DEV_OFFLOAD_OUT) + return 0; + + if (x->lft.soft_packet_limit == XFRM_INF && + x->lft.hard_packet_limit == XFRM_INF) + return 0; + + dwork = kzalloc(sizeof(*dwork), GFP_KERNEL); + if (!dwork) + return -ENOMEM; + + dwork->sa_entry = sa_entry; + INIT_DELAYED_WORK(&dwork->dwork, mlx5e_ipsec_handle_tx_limit); + sa_entry->dwork = dwork; + return 0; +} + +static int mlx5e_xfrm_add_state(struct xfrm_state *x, + struct netlink_ext_ack *extack) +{ + struct mlx5e_ipsec_sa_entry *sa_entry = NULL; + struct net_device *netdev = x->xso.real_dev; + struct mlx5e_ipsec *ipsec; + struct mlx5e_priv *priv; + gfp_t gfp; + int err; + + priv = netdev_priv(netdev); + if (!priv->ipsec) + return -EOPNOTSUPP; + + ipsec = priv->ipsec; + gfp = (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) ? GFP_ATOMIC : GFP_KERNEL; + sa_entry = kzalloc(sizeof(*sa_entry), gfp); + if (!sa_entry) + return -ENOMEM; + + sa_entry->x = x; + sa_entry->ipsec = ipsec; + /* Check if this SA is originated from acquire flow temporary SA */ + if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) + goto out; + + err = mlx5e_xfrm_validate_state(priv->mdev, x, extack); + if (err) + goto err_xfrm; + + if (!mlx5_eswitch_block_ipsec(priv->mdev)) { + err = -EBUSY; + goto err_xfrm; + } + + /* check esn */ + if (x->props.flags & XFRM_STATE_ESN) + mlx5e_ipsec_update_esn_state(sa_entry); + + mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs); + + err = mlx5_ipsec_create_work(sa_entry); + if (err) + goto unblock_ipsec; + + err = mlx5e_ipsec_create_dwork(sa_entry); + if (err) + goto release_work; + + /* create hw context */ + err = mlx5_ipsec_create_sa_ctx(sa_entry); + if (err) + goto release_dwork; + + err = mlx5e_accel_ipsec_fs_add_rule(sa_entry); + if (err) + goto err_hw_ctx; + + if (x->props.mode == XFRM_MODE_TUNNEL && + x->xso.type == XFRM_DEV_OFFLOAD_PACKET && + !mlx5e_ipsec_fs_tunnel_enabled(sa_entry)) { + NL_SET_ERR_MSG_MOD(extack, "Packet offload tunnel mode is disabled due to encap settings"); + err = -EINVAL; + goto err_add_rule; + } + + /* We use *_bh() variant because xfrm_timer_handler(), which runs + * in softirq context, can reach our state delete logic and we need + * xa_erase_bh() there. + */ + err = xa_insert_bh(&ipsec->sadb, sa_entry->ipsec_obj_id, sa_entry, + GFP_KERNEL); + if (err) + goto err_add_rule; + + mlx5e_ipsec_set_esn_ops(sa_entry); + + if (sa_entry->dwork) + queue_delayed_work(ipsec->wq, &sa_entry->dwork->dwork, + MLX5_IPSEC_RESCHED); + + if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET && + x->props.mode == XFRM_MODE_TUNNEL) + xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id, + MLX5E_IPSEC_TUNNEL_SA); + +out: + x->xso.offload_handle = (unsigned long)sa_entry; + return 0; + +err_add_rule: + mlx5e_accel_ipsec_fs_del_rule(sa_entry); +err_hw_ctx: + mlx5_ipsec_free_sa_ctx(sa_entry); +release_dwork: + kfree(sa_entry->dwork); +release_work: + if (sa_entry->work) + kfree(sa_entry->work->data); + kfree(sa_entry->work); +unblock_ipsec: + mlx5_eswitch_unblock_ipsec(priv->mdev); +err_xfrm: + kfree(sa_entry); + NL_SET_ERR_MSG_WEAK_MOD(extack, "Device failed to offload this state"); + return err; +} + +static void mlx5e_xfrm_del_state(struct xfrm_state *x) +{ + struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); + struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + struct mlx5e_ipsec_sa_entry *old; + + if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) + return; + + old = xa_erase_bh(&ipsec->sadb, sa_entry->ipsec_obj_id); + WARN_ON(old != sa_entry); + + if (attrs->mode == XFRM_MODE_TUNNEL && + attrs->type == XFRM_DEV_OFFLOAD_PACKET) + /* Make sure that no ARP requests are running in parallel */ + flush_workqueue(ipsec->wq); + +} + +static void mlx5e_xfrm_free_state(struct xfrm_state *x) +{ + struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + + if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) + goto sa_entry_free; + + if (sa_entry->work) + cancel_work_sync(&sa_entry->work->work); + + if (sa_entry->dwork) + cancel_delayed_work_sync(&sa_entry->dwork->dwork); + + mlx5e_accel_ipsec_fs_del_rule(sa_entry); + mlx5_ipsec_free_sa_ctx(sa_entry); + kfree(sa_entry->dwork); + if (sa_entry->work) + kfree(sa_entry->work->data); + kfree(sa_entry->work); + mlx5_eswitch_unblock_ipsec(ipsec->mdev); +sa_entry_free: + kfree(sa_entry); +} + +static int mlx5e_ipsec_netevent_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlx5_accel_esp_xfrm_attrs *attrs; + struct mlx5e_ipsec_netevent_data *data; + struct mlx5e_ipsec_sa_entry *sa_entry; + struct mlx5e_ipsec *ipsec; + struct neighbour *n = ptr; + struct net_device *netdev; + struct xfrm_state *x; + unsigned long idx; + + if (event != NETEVENT_NEIGH_UPDATE || !(n->nud_state & NUD_VALID)) + return NOTIFY_DONE; + + ipsec = container_of(nb, struct mlx5e_ipsec, netevent_nb); + xa_for_each_marked(&ipsec->sadb, idx, sa_entry, MLX5E_IPSEC_TUNNEL_SA) { + attrs = &sa_entry->attrs; + + if (attrs->family == AF_INET) { + if (!neigh_key_eq32(n, &attrs->saddr.a4) && + !neigh_key_eq32(n, &attrs->daddr.a4)) + continue; + } else { + if (!neigh_key_eq128(n, &attrs->saddr.a4) && + !neigh_key_eq128(n, &attrs->daddr.a4)) + continue; + } + + x = sa_entry->x; + netdev = x->xso.real_dev; + data = sa_entry->work->data; + + neigh_ha_snapshot(data->addr, n, netdev); + queue_work(ipsec->wq, &sa_entry->work->work); + } + + return NOTIFY_DONE; +} + +void mlx5e_ipsec_init(struct mlx5e_priv *priv) +{ + struct mlx5e_ipsec *ipsec; + int ret = -ENOMEM; + + if (!mlx5_ipsec_device_caps(priv->mdev)) { + netdev_dbg(priv->netdev, "Not an IPSec offload device\n"); + return; + } + + ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL); + if (!ipsec) + return; + + xa_init_flags(&ipsec->sadb, XA_FLAGS_ALLOC); + ipsec->mdev = priv->mdev; + ipsec->wq = alloc_workqueue("mlx5e_ipsec: %s", WQ_UNBOUND, 0, + priv->netdev->name); + if (!ipsec->wq) + goto err_wq; + + if (mlx5_ipsec_device_caps(priv->mdev) & + MLX5_IPSEC_CAP_PACKET_OFFLOAD) { + ret = mlx5e_ipsec_aso_init(ipsec); + if (ret) + goto err_aso; + } + + if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) { + ipsec->netevent_nb.notifier_call = mlx5e_ipsec_netevent_event; + ret = register_netevent_notifier(&ipsec->netevent_nb); + if (ret) + goto clear_aso; + } + + ipsec->is_uplink_rep = mlx5e_is_uplink_rep(priv); + ret = mlx5e_accel_ipsec_fs_init(ipsec); + if (ret) + goto err_fs_init; + + ipsec->fs = priv->fs; + priv->ipsec = ipsec; + netdev_dbg(priv->netdev, "IPSec attached to netdevice\n"); + return; + +err_fs_init: + if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) + unregister_netevent_notifier(&ipsec->netevent_nb); +clear_aso: + if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD) + mlx5e_ipsec_aso_cleanup(ipsec); +err_aso: + destroy_workqueue(ipsec->wq); +err_wq: + kfree(ipsec); + mlx5_core_err(priv->mdev, "IPSec initialization failed, %d\n", ret); + return; +} + +void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5e_ipsec *ipsec = priv->ipsec; + + if (!ipsec) + return; + + mlx5e_accel_ipsec_fs_cleanup(ipsec); + if (ipsec->netevent_nb.notifier_call) { + unregister_netevent_notifier(&ipsec->netevent_nb); + ipsec->netevent_nb.notifier_call = NULL; + } + if (ipsec->aso) + mlx5e_ipsec_aso_cleanup(ipsec); + destroy_workqueue(ipsec->wq); + kfree(ipsec); + priv->ipsec = NULL; +} + +static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x) +{ + if (x->props.family == AF_INET) { + /* Offload with IPv4 options is not supported yet */ + if (ip_hdr(skb)->ihl > 5) + return false; + } else { + /* Offload with IPv6 extension headers is not support yet */ + if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr)) + return false; + } + + return true; +} + +static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x) +{ + struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); + struct mlx5e_ipsec_work *work = sa_entry->work; + struct mlx5e_ipsec_sa_entry *sa_entry_shadow; + bool need_update; + + need_update = mlx5e_ipsec_update_esn_state(sa_entry); + if (!need_update) + return; + + sa_entry_shadow = work->data; + memset(sa_entry_shadow, 0x00, sizeof(*sa_entry_shadow)); + mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry_shadow->attrs); + queue_work(sa_entry->ipsec->wq, &work->work); +} + +static void mlx5e_xfrm_update_curlft(struct xfrm_state *x) +{ + struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); + struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule; + u64 packets, bytes, lastuse; + + lockdep_assert(lockdep_is_held(&x->lock) || + lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex)); + + if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) + return; + + mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse); + x->curlft.packets += packets; + x->curlft.bytes += bytes; +} + +static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev, + struct xfrm_policy *x, + struct netlink_ext_ack *extack) +{ + struct xfrm_selector *sel = &x->selector; + + if (x->type != XFRM_POLICY_TYPE_MAIN) { + NL_SET_ERR_MSG_MOD(extack, "Cannot offload non-main policy types"); + return -EINVAL; + } + + /* Please pay attention that we support only one template */ + if (x->xfrm_nr > 1) { + NL_SET_ERR_MSG_MOD(extack, "Cannot offload more than one template"); + return -EINVAL; + } + + if (x->xdo.dir != XFRM_DEV_OFFLOAD_IN && + x->xdo.dir != XFRM_DEV_OFFLOAD_OUT) { + NL_SET_ERR_MSG_MOD(extack, "Cannot offload forward policy"); + return -EINVAL; + } + + if (!x->xfrm_vec[0].reqid && sel->proto == IPPROTO_IP && + addr6_all_zero(sel->saddr.a6) && addr6_all_zero(sel->daddr.a6)) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported policy with reqid 0 without at least one of upper protocol or ip addr(s) different than 0"); + return -EINVAL; + } + + if (x->xdo.type != XFRM_DEV_OFFLOAD_PACKET) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type"); + return -EINVAL; + } + + if (x->selector.proto != IPPROTO_IP && + x->selector.proto != IPPROTO_UDP && + x->selector.proto != IPPROTO_TCP) { + NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than TCP/UDP"); + return -EINVAL; + } + + if (x->priority) { + if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO)) { + NL_SET_ERR_MSG_MOD(extack, "Device does not support policy priority"); + return -EINVAL; + } + + if (x->priority == U32_MAX) { + NL_SET_ERR_MSG_MOD(extack, "Device does not support requested policy priority"); + return -EINVAL; + } + } + + if (x->xdo.type == XFRM_DEV_OFFLOAD_PACKET && + !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)) { + NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported"); + return -EINVAL; + } + + return 0; +} + +static void +mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry, + struct mlx5_accel_pol_xfrm_attrs *attrs) +{ + struct xfrm_policy *x = pol_entry->x; + struct xfrm_selector *sel; + + sel = &x->selector; + memset(attrs, 0, sizeof(*attrs)); + + memcpy(&attrs->saddr, sel->saddr.a6, sizeof(attrs->saddr)); + memcpy(&attrs->daddr, sel->daddr.a6, sizeof(attrs->daddr)); + attrs->family = sel->family; + attrs->dir = x->xdo.dir; + attrs->action = x->action; + attrs->type = XFRM_DEV_OFFLOAD_PACKET; + attrs->reqid = x->xfrm_vec[0].reqid; + attrs->upspec.dport = ntohs(sel->dport); + attrs->upspec.dport_mask = ntohs(sel->dport_mask); + attrs->upspec.sport = ntohs(sel->sport); + attrs->upspec.sport_mask = ntohs(sel->sport_mask); + attrs->upspec.proto = sel->proto; + attrs->prio = x->priority; +} + +static int mlx5e_xfrm_add_policy(struct xfrm_policy *x, + struct netlink_ext_ack *extack) +{ + struct net_device *netdev = x->xdo.real_dev; + struct mlx5e_ipsec_pol_entry *pol_entry; + struct mlx5e_priv *priv; + int err; + + priv = netdev_priv(netdev); + if (!priv->ipsec) { + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support IPsec packet offload"); + return -EOPNOTSUPP; + } + + err = mlx5e_xfrm_validate_policy(priv->mdev, x, extack); + if (err) + return err; + + pol_entry = kzalloc(sizeof(*pol_entry), GFP_KERNEL); + if (!pol_entry) + return -ENOMEM; + + pol_entry->x = x; + pol_entry->ipsec = priv->ipsec; + + if (!mlx5_eswitch_block_ipsec(priv->mdev)) { + err = -EBUSY; + goto ipsec_busy; + } + + mlx5e_ipsec_build_accel_pol_attrs(pol_entry, &pol_entry->attrs); + err = mlx5e_accel_ipsec_fs_add_pol(pol_entry); + if (err) + goto err_fs; + + x->xdo.offload_handle = (unsigned long)pol_entry; + return 0; + +err_fs: + mlx5_eswitch_unblock_ipsec(priv->mdev); +ipsec_busy: + kfree(pol_entry); + NL_SET_ERR_MSG_MOD(extack, "Device failed to offload this policy"); + return err; +} + +static void mlx5e_xfrm_del_policy(struct xfrm_policy *x) +{ + struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x); + + mlx5e_accel_ipsec_fs_del_pol(pol_entry); + mlx5_eswitch_unblock_ipsec(pol_entry->ipsec->mdev); +} + +static void mlx5e_xfrm_free_policy(struct xfrm_policy *x) +{ + struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x); + + kfree(pol_entry); +} + +static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = { + .xdo_dev_state_add = mlx5e_xfrm_add_state, + .xdo_dev_state_delete = mlx5e_xfrm_del_state, + .xdo_dev_state_free = mlx5e_xfrm_free_state, + .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok, + .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state, + + .xdo_dev_state_update_curlft = mlx5e_xfrm_update_curlft, + .xdo_dev_policy_add = mlx5e_xfrm_add_policy, + .xdo_dev_policy_delete = mlx5e_xfrm_del_policy, + .xdo_dev_policy_free = mlx5e_xfrm_free_policy, +}; + +void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct net_device *netdev = priv->netdev; + + if (!mlx5_ipsec_device_caps(mdev)) + return; + + mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n"); + + netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops; + netdev->features |= NETIF_F_HW_ESP; + netdev->hw_enc_features |= NETIF_F_HW_ESP; + + if (!MLX5_CAP_ETH(mdev, swp_csum)) { + mlx5_core_dbg(mdev, "mlx5e: SWP checksum not supported\n"); + return; + } + + netdev->features |= NETIF_F_HW_ESP_TX_CSUM; + netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM; + + if (!MLX5_CAP_ETH(mdev, swp_lso)) { + mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n"); + return; + } + + netdev->gso_partial_features |= NETIF_F_GSO_ESP; + mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n"); + netdev->features |= NETIF_F_GSO_ESP; + netdev->hw_features |= NETIF_F_GSO_ESP; + netdev->hw_enc_features |= NETIF_F_GSO_ESP; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h new file mode 100644 index 0000000000..9e7c42c2f7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5E_IPSEC_H__ +#define __MLX5E_IPSEC_H__ + +#include +#include +#include +#include "lib/aso.h" + +#define MLX5E_IPSEC_SADB_RX_BITS 10 +#define MLX5E_IPSEC_ESN_SCOPE_MID 0x80000000L + +struct aes_gcm_keymat { + u64 seq_iv; + + u32 salt; + u32 icv_len; + + u32 key_len; + u32 aes_key[256 / 32]; +}; + +struct upspec { + u16 dport; + u16 dport_mask; + u16 sport; + u16 sport_mask; + u8 proto; +}; + +struct mlx5_ipsec_lft { + u64 hard_packet_limit; + u64 soft_packet_limit; + u64 numb_rounds_hard; + u64 numb_rounds_soft; +}; + +struct mlx5_replay_esn { + u32 replay_window; + u32 esn; + u32 esn_msb; + u8 overlap : 1; + u8 trigger : 1; +}; + +struct mlx5_accel_esp_xfrm_attrs { + u32 spi; + u32 mode; + struct aes_gcm_keymat aes_gcm; + + union { + __be32 a4; + __be32 a6[4]; + } saddr; + + union { + __be32 a4; + __be32 a6[4]; + } daddr; + + struct upspec upspec; + u8 dir : 2; + u8 type : 2; + u8 drop : 1; + u8 encap : 1; + u8 family; + struct mlx5_replay_esn replay_esn; + u32 authsize; + u32 reqid; + struct mlx5_ipsec_lft lft; + union { + u8 smac[ETH_ALEN]; + __be16 sport; + }; + union { + u8 dmac[ETH_ALEN]; + __be16 dport; + }; +}; + +enum mlx5_ipsec_cap { + MLX5_IPSEC_CAP_CRYPTO = 1 << 0, + MLX5_IPSEC_CAP_ESN = 1 << 1, + MLX5_IPSEC_CAP_PACKET_OFFLOAD = 1 << 2, + MLX5_IPSEC_CAP_ROCE = 1 << 3, + MLX5_IPSEC_CAP_PRIO = 1 << 4, + MLX5_IPSEC_CAP_TUNNEL = 1 << 5, + MLX5_IPSEC_CAP_ESPINUDP = 1 << 6, +}; + +struct mlx5e_priv; + +struct mlx5e_ipsec_hw_stats { + u64 ipsec_rx_pkts; + u64 ipsec_rx_bytes; + u64 ipsec_rx_drop_pkts; + u64 ipsec_rx_drop_bytes; + u64 ipsec_tx_pkts; + u64 ipsec_tx_bytes; + u64 ipsec_tx_drop_pkts; + u64 ipsec_tx_drop_bytes; +}; + +struct mlx5e_ipsec_sw_stats { + atomic64_t ipsec_rx_drop_sp_alloc; + atomic64_t ipsec_rx_drop_sadb_miss; + atomic64_t ipsec_rx_drop_syndrome; + atomic64_t ipsec_tx_drop_bundle; + atomic64_t ipsec_tx_drop_no_state; + atomic64_t ipsec_tx_drop_not_ip; + atomic64_t ipsec_tx_drop_trailer; +}; + +struct mlx5e_ipsec_fc; +struct mlx5e_ipsec_tx; + +struct mlx5e_ipsec_work { + struct work_struct work; + struct mlx5e_ipsec_sa_entry *sa_entry; + void *data; +}; + +struct mlx5e_ipsec_netevent_data { + u8 addr[ETH_ALEN]; +}; + +struct mlx5e_ipsec_dwork { + struct delayed_work dwork; + struct mlx5e_ipsec_sa_entry *sa_entry; +}; + +struct mlx5e_ipsec_aso { + u8 __aligned(64) ctx[MLX5_ST_SZ_BYTES(ipsec_aso)]; + dma_addr_t dma_addr; + struct mlx5_aso *aso; + /* Protect ASO WQ access, as it is global to whole IPsec */ + spinlock_t lock; +}; + +struct mlx5e_ipsec_rx_create_attr { + struct mlx5_flow_namespace *ns; + struct mlx5_ttc_table *ttc; + u32 family; + int prio; + int pol_level; + int sa_level; + int status_level; + enum mlx5_flow_namespace_type chains_ns; +}; + +struct mlx5e_ipsec_ft { + struct mutex mutex; /* Protect changes to this struct */ + struct mlx5_flow_table *pol; + struct mlx5_flow_table *sa; + struct mlx5_flow_table *status; + u32 refcnt; +}; + +struct mlx5e_ipsec_rule { + struct mlx5_flow_handle *rule; + struct mlx5_modify_hdr *modify_hdr; + struct mlx5_pkt_reformat *pkt_reformat; + struct mlx5_fc *fc; +}; + +struct mlx5e_ipsec_miss { + struct mlx5_flow_group *group; + struct mlx5_flow_handle *rule; +}; + +struct mlx5e_ipsec_rx { + struct mlx5e_ipsec_ft ft; + struct mlx5e_ipsec_miss pol; + struct mlx5e_ipsec_miss sa; + struct mlx5e_ipsec_rule status; + struct mlx5e_ipsec_miss status_drop; + struct mlx5_fc *status_drop_cnt; + struct mlx5e_ipsec_fc *fc; + struct mlx5_fs_chains *chains; + u8 allow_tunnel_mode : 1; + struct xarray ipsec_obj_id_map; +}; + +struct mlx5e_ipsec_tx_create_attr { + int prio; + int pol_level; + int sa_level; + int cnt_level; + enum mlx5_flow_namespace_type chains_ns; +}; + +struct mlx5e_ipsec { + struct mlx5_core_dev *mdev; + struct xarray sadb; + struct mlx5e_ipsec_sw_stats sw_stats; + struct mlx5e_ipsec_hw_stats hw_stats; + struct workqueue_struct *wq; + struct mlx5e_flow_steering *fs; + struct mlx5e_ipsec_rx *rx_ipv4; + struct mlx5e_ipsec_rx *rx_ipv6; + struct mlx5e_ipsec_rx *rx_esw; + struct mlx5e_ipsec_tx *tx; + struct mlx5e_ipsec_tx *tx_esw; + struct mlx5e_ipsec_aso *aso; + struct notifier_block nb; + struct notifier_block netevent_nb; + struct mlx5_ipsec_fs *roce; + u8 is_uplink_rep: 1; +}; + +struct mlx5e_ipsec_esn_state { + u32 esn; + u32 esn_msb; + u8 overlap: 1; +}; + +struct mlx5e_ipsec_limits { + u64 round; + u8 soft_limit_hit : 1; + u8 fix_limit : 1; +}; + +struct mlx5e_ipsec_sa_entry { + struct mlx5e_ipsec_esn_state esn_state; + struct xfrm_state *x; + struct mlx5e_ipsec *ipsec; + struct mlx5_accel_esp_xfrm_attrs attrs; + void (*set_iv_op)(struct sk_buff *skb, struct xfrm_state *x, + struct xfrm_offload *xo); + u32 ipsec_obj_id; + u32 enc_key_id; + struct mlx5e_ipsec_rule ipsec_rule; + struct mlx5e_ipsec_work *work; + struct mlx5e_ipsec_dwork *dwork; + struct mlx5e_ipsec_limits limits; + u32 rx_mapped_id; +}; + +struct mlx5_accel_pol_xfrm_attrs { + union { + __be32 a4; + __be32 a6[4]; + } saddr; + + union { + __be32 a4; + __be32 a6[4]; + } daddr; + + struct upspec upspec; + u8 family; + u8 action; + u8 type : 2; + u8 dir : 2; + u32 reqid; + u32 prio; +}; + +struct mlx5e_ipsec_pol_entry { + struct xfrm_policy *x; + struct mlx5e_ipsec *ipsec; + struct mlx5e_ipsec_rule ipsec_rule; + struct mlx5_accel_pol_xfrm_attrs attrs; +}; + +#ifdef CONFIG_MLX5_EN_IPSEC + +void mlx5e_ipsec_init(struct mlx5e_priv *priv); +void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv); +void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv); + +void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec); +int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec); +int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry); +void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry); +int mlx5e_accel_ipsec_fs_add_pol(struct mlx5e_ipsec_pol_entry *pol_entry); +void mlx5e_accel_ipsec_fs_del_pol(struct mlx5e_ipsec_pol_entry *pol_entry); +void mlx5e_accel_ipsec_fs_modify(struct mlx5e_ipsec_sa_entry *sa_entry); +bool mlx5e_ipsec_fs_tunnel_enabled(struct mlx5e_ipsec_sa_entry *sa_entry); + +int mlx5_ipsec_create_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry); +void mlx5_ipsec_free_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry); + +u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev); + +void mlx5_accel_esp_modify_xfrm(struct mlx5e_ipsec_sa_entry *sa_entry, + const struct mlx5_accel_esp_xfrm_attrs *attrs); + +int mlx5e_ipsec_aso_init(struct mlx5e_ipsec *ipsec); +void mlx5e_ipsec_aso_cleanup(struct mlx5e_ipsec *ipsec); + +int mlx5e_ipsec_aso_query(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_wqe_aso_ctrl_seg *data); +void mlx5e_accel_ipsec_fs_read_stats(struct mlx5e_priv *priv, + void *ipsec_stats); + +void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_accel_esp_xfrm_attrs *attrs); +static inline struct mlx5_core_dev * +mlx5e_ipsec_sa2dev(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + return sa_entry->ipsec->mdev; +} + +static inline struct mlx5_core_dev * +mlx5e_ipsec_pol2dev(struct mlx5e_ipsec_pol_entry *pol_entry) +{ + return pol_entry->ipsec->mdev; +} + +static inline bool addr6_all_zero(__be32 *addr6) +{ + static const __be32 zaddr6[4] = {}; + + return !memcmp(addr6, zaddr6, sizeof(zaddr6)); +} +#else +static inline void mlx5e_ipsec_init(struct mlx5e_priv *priv) +{ +} + +static inline void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) +{ +} + +static inline void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) +{ +} + +static inline u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) +{ + return 0; +} +#endif + +#endif /* __MLX5E_IPSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c new file mode 100644 index 0000000000..81e6aa6434 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -0,0 +1,2097 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#include +#include "en.h" +#include "en/fs.h" +#include "eswitch.h" +#include "ipsec.h" +#include "fs_core.h" +#include "lib/ipsec_fs_roce.h" +#include "lib/fs_chains.h" +#include "esw/ipsec_fs.h" +#include "en_rep.h" + +#define NUM_IPSEC_FTE BIT(15) +#define MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE 16 +#define IPSEC_TUNNEL_DEFAULT_TTL 0x40 + +struct mlx5e_ipsec_fc { + struct mlx5_fc *cnt; + struct mlx5_fc *drop; +}; + +struct mlx5e_ipsec_tx { + struct mlx5e_ipsec_ft ft; + struct mlx5e_ipsec_miss pol; + struct mlx5e_ipsec_miss sa; + struct mlx5e_ipsec_rule status; + struct mlx5_flow_namespace *ns; + struct mlx5e_ipsec_fc *fc; + struct mlx5_fs_chains *chains; + u8 allow_tunnel_mode : 1; +}; + +/* IPsec RX flow steering */ +static enum mlx5_traffic_types family2tt(u32 family) +{ + if (family == AF_INET) + return MLX5_TT_IPV4_IPSEC_ESP; + return MLX5_TT_IPV6_IPSEC_ESP; +} + +static struct mlx5e_ipsec_rx *ipsec_rx(struct mlx5e_ipsec *ipsec, u32 family, int type) +{ + if (ipsec->is_uplink_rep && type == XFRM_DEV_OFFLOAD_PACKET) + return ipsec->rx_esw; + + if (family == AF_INET) + return ipsec->rx_ipv4; + + return ipsec->rx_ipv6; +} + +static struct mlx5e_ipsec_tx *ipsec_tx(struct mlx5e_ipsec *ipsec, int type) +{ + if (ipsec->is_uplink_rep && type == XFRM_DEV_OFFLOAD_PACKET) + return ipsec->tx_esw; + + return ipsec->tx; +} + +static struct mlx5_fs_chains * +ipsec_chains_create(struct mlx5_core_dev *mdev, struct mlx5_flow_table *miss_ft, + enum mlx5_flow_namespace_type ns, int base_prio, + int base_level, struct mlx5_flow_table **root_ft) +{ + struct mlx5_chains_attr attr = {}; + struct mlx5_fs_chains *chains; + struct mlx5_flow_table *ft; + int err; + + attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED | + MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED; + attr.max_grp_num = 2; + attr.default_ft = miss_ft; + attr.ns = ns; + attr.fs_base_prio = base_prio; + attr.fs_base_level = base_level; + chains = mlx5_chains_create(mdev, &attr); + if (IS_ERR(chains)) + return chains; + + /* Create chain 0, prio 1, level 0 to connect chains to prev in fs_core */ + ft = mlx5_chains_get_table(chains, 0, 1, 0); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto err_chains_get; + } + + *root_ft = ft; + return chains; + +err_chains_get: + mlx5_chains_destroy(chains); + return ERR_PTR(err); +} + +static void ipsec_chains_destroy(struct mlx5_fs_chains *chains) +{ + mlx5_chains_put_table(chains, 0, 1, 0); + mlx5_chains_destroy(chains); +} + +static struct mlx5_flow_table * +ipsec_chains_get_table(struct mlx5_fs_chains *chains, u32 prio) +{ + return mlx5_chains_get_table(chains, 0, prio + 1, 0); +} + +static void ipsec_chains_put_table(struct mlx5_fs_chains *chains, u32 prio) +{ + mlx5_chains_put_table(chains, 0, prio + 1, 0); +} + +static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_flow_namespace *ns, + int level, int prio, + int max_num_groups, u32 flags) +{ + struct mlx5_flow_table_attr ft_attr = {}; + + ft_attr.autogroup.num_reserved_entries = 1; + ft_attr.autogroup.max_num_groups = max_num_groups; + ft_attr.max_fte = NUM_IPSEC_FTE; + ft_attr.level = level; + ft_attr.prio = prio; + ft_attr.flags = flags; + + return mlx5_create_auto_grouped_flow_table(ns, &ft_attr); +} + +static void ipsec_rx_status_drop_destroy(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx) +{ + mlx5_del_flow_rules(rx->status_drop.rule); + mlx5_destroy_flow_group(rx->status_drop.group); + mlx5_fc_destroy(ipsec->mdev, rx->status_drop_cnt); +} + +static void ipsec_rx_status_pass_destroy(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx) +{ + mlx5_del_flow_rules(rx->status.rule); + + if (rx != ipsec->rx_esw) + return; + +#ifdef CONFIG_MLX5_ESWITCH + mlx5_chains_put_table(esw_chains(ipsec->mdev->priv.eswitch), 0, 1, 0); +#endif +} + +static int ipsec_rx_status_drop_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table *ft = rx->ft.status; + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_fc *flow_counter; + struct mlx5_flow_spec *spec; + struct mlx5_flow_group *g; + u32 *flow_group_in; + int err = 0; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!flow_group_in || !spec) { + err = -ENOMEM; + goto err_out; + } + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ft->max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ft->max_fte - 1); + g = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_err(mdev, + "Failed to add ipsec rx status drop flow group, err=%d\n", err); + goto err_out; + } + + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + mlx5_core_err(mdev, + "Failed to add ipsec rx status drop rule counter, err=%d\n", err); + goto err_cnt; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter_id = mlx5_fc_id(flow_counter); + if (rx == ipsec->rx_esw) + spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, + "Failed to add ipsec rx status drop rule, err=%d\n", err); + goto err_rule; + } + + rx->status_drop.group = g; + rx->status_drop.rule = rule; + rx->status_drop_cnt = flow_counter; + + kvfree(flow_group_in); + kvfree(spec); + return 0; + +err_rule: + mlx5_fc_destroy(mdev, flow_counter); +err_cnt: + mlx5_destroy_flow_group(g); +err_out: + kvfree(flow_group_in); + kvfree(spec); + return err; +} + +static int ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters_2.ipsec_syndrome); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters_2.ipsec_syndrome, 0); + if (rx == ipsec->rx_esw) + spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + flow_act.flags = FLOW_ACT_NO_APPEND; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + rule = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 2); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(ipsec->mdev, + "Failed to add ipsec rx status pass rule, err=%d\n", err); + goto err_rule; + } + + rx->status.rule = rule; + kvfree(spec); + return 0; + +err_rule: + kvfree(spec); + return err; +} + +static void mlx5_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx) +{ + ipsec_rx_status_pass_destroy(ipsec, rx); + ipsec_rx_status_drop_destroy(ipsec, rx); +} + +static int mlx5_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5_flow_destination *dest) +{ + int err; + + err = ipsec_rx_status_drop_create(ipsec, rx); + if (err) + return err; + + err = ipsec_rx_status_pass_create(ipsec, rx, dest); + if (err) + goto err_pass_create; + + return 0; + +err_pass_create: + ipsec_rx_status_drop_destroy(ipsec, rx); + return err; +} + +static int ipsec_miss_create(struct mlx5_core_dev *mdev, + struct mlx5_flow_table *ft, + struct mlx5e_ipsec_miss *miss, + struct mlx5_flow_destination *dest) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_spec *spec; + u32 *flow_group_in; + int err = 0; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!flow_group_in || !spec) { + err = -ENOMEM; + goto out; + } + + /* Create miss_group */ + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ft->max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ft->max_fte - 1); + miss->group = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(miss->group)) { + err = PTR_ERR(miss->group); + mlx5_core_err(mdev, "fail to create IPsec miss_group err=%d\n", + err); + goto out; + } + + /* Create miss rule */ + miss->rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); + if (IS_ERR(miss->rule)) { + mlx5_destroy_flow_group(miss->group); + err = PTR_ERR(miss->rule); + mlx5_core_err(mdev, "fail to create IPsec miss_rule err=%d\n", + err); + goto out; + } +out: + kvfree(flow_group_in); + kvfree(spec); + return err; +} + +static void ipsec_rx_ft_disconnect(struct mlx5e_ipsec *ipsec, u32 family) +{ + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(ipsec->fs, false); + + mlx5_ttc_fwd_default_dest(ttc, family2tt(family)); +} + +static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, u32 family) +{ + /* disconnect */ + if (rx != ipsec->rx_esw) + ipsec_rx_ft_disconnect(ipsec, family); + + if (rx->chains) { + ipsec_chains_destroy(rx->chains); + } else { + mlx5_del_flow_rules(rx->pol.rule); + mlx5_destroy_flow_group(rx->pol.group); + mlx5_destroy_flow_table(rx->ft.pol); + } + + mlx5_del_flow_rules(rx->sa.rule); + mlx5_destroy_flow_group(rx->sa.group); + mlx5_destroy_flow_table(rx->ft.sa); + if (rx->allow_tunnel_mode) + mlx5_eswitch_unblock_encap(mdev); + mlx5_ipsec_rx_status_destroy(ipsec, rx); + mlx5_destroy_flow_table(rx->ft.status); + + mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family); +} + +static void ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + u32 family, + struct mlx5e_ipsec_rx_create_attr *attr) +{ + if (rx == ipsec->rx_esw) { + /* For packet offload in switchdev mode, RX & TX use FDB namespace */ + attr->ns = ipsec->tx_esw->ns; + mlx5_esw_ipsec_rx_create_attr_set(ipsec, attr); + return; + } + + attr->ns = mlx5e_fs_get_ns(ipsec->fs, false); + attr->ttc = mlx5e_fs_get_ttc(ipsec->fs, false); + attr->family = family; + attr->prio = MLX5E_NIC_PRIO; + attr->pol_level = MLX5E_ACCEL_FS_POL_FT_LEVEL; + attr->sa_level = MLX5E_ACCEL_FS_ESP_FT_LEVEL; + attr->status_level = MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL; + attr->chains_ns = MLX5_FLOW_NAMESPACE_KERNEL; +} + +static int ipsec_rx_status_pass_dest_get(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5e_ipsec_rx_create_attr *attr, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_table *ft; + int err; + + if (rx == ipsec->rx_esw) + return mlx5_esw_ipsec_rx_status_pass_dest_get(ipsec, dest); + + *dest = mlx5_ttc_get_default_dest(attr->ttc, family2tt(attr->family)); + err = mlx5_ipsec_fs_roce_rx_create(ipsec->mdev, ipsec->roce, attr->ns, dest, + attr->family, MLX5E_ACCEL_FS_ESP_FT_ROCE_LEVEL, + attr->prio); + if (err) + return err; + + ft = mlx5_ipsec_fs_roce_ft_get(ipsec->roce, attr->family); + if (ft) { + dest->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest->ft = ft; + } + + return 0; +} + +static void ipsec_rx_ft_connect(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5e_ipsec_rx_create_attr *attr) +{ + struct mlx5_flow_destination dest = {}; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = rx->ft.pol; + mlx5_ttc_fwd_dest(attr->ttc, family2tt(attr->family), &dest); +} + +static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, u32 family) +{ + struct mlx5e_ipsec_rx_create_attr attr; + struct mlx5_flow_destination dest[2]; + struct mlx5_flow_table *ft; + u32 flags = 0; + int err; + + ipsec_rx_create_attr_set(ipsec, rx, family, &attr); + + err = ipsec_rx_status_pass_dest_get(ipsec, rx, &attr, &dest[0]); + if (err) + return err; + + ft = ipsec_ft_create(attr.ns, attr.status_level, attr.prio, 1, 0); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto err_fs_ft_status; + } + rx->ft.status = ft; + + dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[1].counter_id = mlx5_fc_id(rx->fc->cnt); + err = mlx5_ipsec_rx_status_create(ipsec, rx, dest); + if (err) + goto err_add; + + /* Create FT */ + if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL) + rx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev); + if (rx->allow_tunnel_mode) + flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + ft = ipsec_ft_create(attr.ns, attr.sa_level, attr.prio, 2, flags); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto err_fs_ft; + } + rx->ft.sa = ft; + + err = ipsec_miss_create(mdev, rx->ft.sa, &rx->sa, dest); + if (err) + goto err_fs; + + if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO) { + rx->chains = ipsec_chains_create(mdev, rx->ft.sa, + attr.chains_ns, + attr.prio, + attr.pol_level, + &rx->ft.pol); + if (IS_ERR(rx->chains)) { + err = PTR_ERR(rx->chains); + goto err_pol_ft; + } + + goto connect; + } + + ft = ipsec_ft_create(attr.ns, attr.pol_level, attr.prio, 2, 0); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto err_pol_ft; + } + rx->ft.pol = ft; + memset(dest, 0x00, 2 * sizeof(*dest)); + dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[0].ft = rx->ft.sa; + err = ipsec_miss_create(mdev, rx->ft.pol, &rx->pol, dest); + if (err) + goto err_pol_miss; + +connect: + /* connect */ + if (rx != ipsec->rx_esw) + ipsec_rx_ft_connect(ipsec, rx, &attr); + return 0; + +err_pol_miss: + mlx5_destroy_flow_table(rx->ft.pol); +err_pol_ft: + mlx5_del_flow_rules(rx->sa.rule); + mlx5_destroy_flow_group(rx->sa.group); +err_fs: + mlx5_destroy_flow_table(rx->ft.sa); +err_fs_ft: + if (rx->allow_tunnel_mode) + mlx5_eswitch_unblock_encap(mdev); + mlx5_del_flow_rules(rx->status.rule); + mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr); +err_add: + mlx5_destroy_flow_table(rx->ft.status); +err_fs_ft_status: + mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family); + return err; +} + +static int rx_get(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, u32 family) +{ + int err; + + if (rx->ft.refcnt) + goto skip; + + err = mlx5_eswitch_block_mode(mdev); + if (err) + return err; + + err = rx_create(mdev, ipsec, rx, family); + if (err) { + mlx5_eswitch_unblock_mode(mdev); + return err; + } + +skip: + rx->ft.refcnt++; + return 0; +} + +static void rx_put(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx, + u32 family) +{ + if (--rx->ft.refcnt) + return; + + rx_destroy(ipsec->mdev, ipsec, rx, family); + mlx5_eswitch_unblock_mode(ipsec->mdev); +} + +static struct mlx5e_ipsec_rx *rx_ft_get(struct mlx5_core_dev *mdev, + struct mlx5e_ipsec *ipsec, u32 family, + int type) +{ + struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family, type); + int err; + + mutex_lock(&rx->ft.mutex); + err = rx_get(mdev, ipsec, rx, family); + mutex_unlock(&rx->ft.mutex); + if (err) + return ERR_PTR(err); + + return rx; +} + +static struct mlx5_flow_table *rx_ft_get_policy(struct mlx5_core_dev *mdev, + struct mlx5e_ipsec *ipsec, + u32 family, u32 prio, int type) +{ + struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family, type); + struct mlx5_flow_table *ft; + int err; + + mutex_lock(&rx->ft.mutex); + err = rx_get(mdev, ipsec, rx, family); + if (err) + goto err_get; + + ft = rx->chains ? ipsec_chains_get_table(rx->chains, prio) : rx->ft.pol; + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto err_get_ft; + } + + mutex_unlock(&rx->ft.mutex); + return ft; + +err_get_ft: + rx_put(ipsec, rx, family); +err_get: + mutex_unlock(&rx->ft.mutex); + return ERR_PTR(err); +} + +static void rx_ft_put(struct mlx5e_ipsec *ipsec, u32 family, int type) +{ + struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family, type); + + mutex_lock(&rx->ft.mutex); + rx_put(ipsec, rx, family); + mutex_unlock(&rx->ft.mutex); +} + +static void rx_ft_put_policy(struct mlx5e_ipsec *ipsec, u32 family, u32 prio, int type) +{ + struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family, type); + + mutex_lock(&rx->ft.mutex); + if (rx->chains) + ipsec_chains_put_table(rx->chains, prio); + + rx_put(ipsec, rx, family); + mutex_unlock(&rx->ft.mutex); +} + +static int ipsec_counter_rule_tx(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *fte; + struct mlx5_flow_spec *spec; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + /* create fte */ + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter_id = mlx5_fc_id(tx->fc->cnt); + fte = mlx5_add_flow_rules(tx->ft.status, spec, &flow_act, &dest, 1); + if (IS_ERR(fte)) { + err = PTR_ERR(fte); + mlx5_core_err(mdev, "Fail to add ipsec tx counter rule err=%d\n", err); + goto err_rule; + } + + kvfree(spec); + tx->status.rule = fte; + return 0; + +err_rule: + kvfree(spec); + return err; +} + +/* IPsec TX flow steering */ +static void tx_destroy(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx, + struct mlx5_ipsec_fs *roce) +{ + mlx5_ipsec_fs_roce_tx_destroy(roce); + if (tx->chains) { + ipsec_chains_destroy(tx->chains); + } else { + mlx5_del_flow_rules(tx->pol.rule); + mlx5_destroy_flow_group(tx->pol.group); + mlx5_destroy_flow_table(tx->ft.pol); + } + + if (tx == ipsec->tx_esw) { + mlx5_del_flow_rules(tx->sa.rule); + mlx5_destroy_flow_group(tx->sa.group); + } + mlx5_destroy_flow_table(tx->ft.sa); + if (tx->allow_tunnel_mode) + mlx5_eswitch_unblock_encap(ipsec->mdev); + mlx5_del_flow_rules(tx->status.rule); + mlx5_destroy_flow_table(tx->ft.status); +} + +static void ipsec_tx_create_attr_set(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_tx *tx, + struct mlx5e_ipsec_tx_create_attr *attr) +{ + if (tx == ipsec->tx_esw) { + mlx5_esw_ipsec_tx_create_attr_set(ipsec, attr); + return; + } + + attr->prio = 0; + attr->pol_level = 0; + attr->sa_level = 1; + attr->cnt_level = 2; + attr->chains_ns = MLX5_FLOW_NAMESPACE_EGRESS_IPSEC; +} + +static int tx_create(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx, + struct mlx5_ipsec_fs *roce) +{ + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5e_ipsec_tx_create_attr attr; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_table *ft; + u32 flags = 0; + int err; + + ipsec_tx_create_attr_set(ipsec, tx, &attr); + ft = ipsec_ft_create(tx->ns, attr.cnt_level, attr.prio, 1, 0); + if (IS_ERR(ft)) + return PTR_ERR(ft); + tx->ft.status = ft; + + err = ipsec_counter_rule_tx(mdev, tx); + if (err) + goto err_status_rule; + + if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL) + tx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev); + if (tx->allow_tunnel_mode) + flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + ft = ipsec_ft_create(tx->ns, attr.sa_level, attr.prio, 4, flags); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto err_sa_ft; + } + tx->ft.sa = ft; + + if (tx == ipsec->tx_esw) { + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = MLX5_VPORT_UPLINK; + err = ipsec_miss_create(mdev, tx->ft.sa, &tx->sa, &dest); + if (err) + goto err_sa_miss; + memset(&dest, 0, sizeof(dest)); + } + + if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO) { + tx->chains = ipsec_chains_create( + mdev, tx->ft.sa, attr.chains_ns, attr.prio, attr.pol_level, + &tx->ft.pol); + if (IS_ERR(tx->chains)) { + err = PTR_ERR(tx->chains); + goto err_pol_ft; + } + + goto connect_roce; + } + + ft = ipsec_ft_create(tx->ns, attr.pol_level, attr.prio, 2, 0); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto err_pol_ft; + } + tx->ft.pol = ft; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = tx->ft.sa; + err = ipsec_miss_create(mdev, tx->ft.pol, &tx->pol, &dest); + if (err) { + mlx5_destroy_flow_table(tx->ft.pol); + goto err_pol_ft; + } + +connect_roce: + err = mlx5_ipsec_fs_roce_tx_create(mdev, roce, tx->ft.pol); + if (err) + goto err_roce; + return 0; + +err_roce: + if (tx->chains) { + ipsec_chains_destroy(tx->chains); + } else { + mlx5_del_flow_rules(tx->pol.rule); + mlx5_destroy_flow_group(tx->pol.group); + mlx5_destroy_flow_table(tx->ft.pol); + } +err_pol_ft: + if (tx == ipsec->tx_esw) { + mlx5_del_flow_rules(tx->sa.rule); + mlx5_destroy_flow_group(tx->sa.group); + } +err_sa_miss: + mlx5_destroy_flow_table(tx->ft.sa); +err_sa_ft: + if (tx->allow_tunnel_mode) + mlx5_eswitch_unblock_encap(mdev); + mlx5_del_flow_rules(tx->status.rule); +err_status_rule: + mlx5_destroy_flow_table(tx->ft.status); + return err; +} + +static void ipsec_esw_tx_ft_policy_set(struct mlx5_core_dev *mdev, + struct mlx5_flow_table *ft) +{ +#ifdef CONFIG_MLX5_ESWITCH + struct mlx5_eswitch *esw = mdev->priv.eswitch; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5e_priv *priv; + + esw->offloads.ft_ipsec_tx_pol = ft; + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + priv = netdev_priv(uplink_rpriv->netdev); + if (!priv->channels.num) + return; + + mlx5e_rep_deactivate_channels(priv); + mlx5e_rep_activate_channels(priv); +#endif +} + +static int tx_get(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_tx *tx) +{ + int err; + + if (tx->ft.refcnt) + goto skip; + + err = mlx5_eswitch_block_mode(mdev); + if (err) + return err; + + err = tx_create(ipsec, tx, ipsec->roce); + if (err) { + mlx5_eswitch_unblock_mode(mdev); + return err; + } + + if (tx == ipsec->tx_esw) + ipsec_esw_tx_ft_policy_set(mdev, tx->ft.pol); + +skip: + tx->ft.refcnt++; + return 0; +} + +static void tx_put(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx) +{ + if (--tx->ft.refcnt) + return; + + if (tx == ipsec->tx_esw) { + mlx5_esw_ipsec_restore_dest_uplink(ipsec->mdev); + ipsec_esw_tx_ft_policy_set(ipsec->mdev, NULL); + } + + tx_destroy(ipsec, tx, ipsec->roce); + mlx5_eswitch_unblock_mode(ipsec->mdev); +} + +static struct mlx5_flow_table *tx_ft_get_policy(struct mlx5_core_dev *mdev, + struct mlx5e_ipsec *ipsec, + u32 prio, int type) +{ + struct mlx5e_ipsec_tx *tx = ipsec_tx(ipsec, type); + struct mlx5_flow_table *ft; + int err; + + mutex_lock(&tx->ft.mutex); + err = tx_get(mdev, ipsec, tx); + if (err) + goto err_get; + + ft = tx->chains ? ipsec_chains_get_table(tx->chains, prio) : tx->ft.pol; + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto err_get_ft; + } + + mutex_unlock(&tx->ft.mutex); + return ft; + +err_get_ft: + tx_put(ipsec, tx); +err_get: + mutex_unlock(&tx->ft.mutex); + return ERR_PTR(err); +} + +static struct mlx5e_ipsec_tx *tx_ft_get(struct mlx5_core_dev *mdev, + struct mlx5e_ipsec *ipsec, int type) +{ + struct mlx5e_ipsec_tx *tx = ipsec_tx(ipsec, type); + int err; + + mutex_lock(&tx->ft.mutex); + err = tx_get(mdev, ipsec, tx); + mutex_unlock(&tx->ft.mutex); + if (err) + return ERR_PTR(err); + + return tx; +} + +static void tx_ft_put(struct mlx5e_ipsec *ipsec, int type) +{ + struct mlx5e_ipsec_tx *tx = ipsec_tx(ipsec, type); + + mutex_lock(&tx->ft.mutex); + tx_put(ipsec, tx); + mutex_unlock(&tx->ft.mutex); +} + +static void tx_ft_put_policy(struct mlx5e_ipsec *ipsec, u32 prio, int type) +{ + struct mlx5e_ipsec_tx *tx = ipsec_tx(ipsec, type); + + mutex_lock(&tx->ft.mutex); + if (tx->chains) + ipsec_chains_put_table(tx->chains, prio); + + tx_put(ipsec, tx); + mutex_unlock(&tx->ft.mutex); +} + +static void setup_fte_addr4(struct mlx5_flow_spec *spec, __be32 *saddr, + __be32 *daddr) +{ + if (!*saddr && !*daddr) + return; + + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, 4); + + if (*saddr) { + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4), saddr, 4); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); + } + + if (*daddr) { + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4), daddr, 4); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + } +} + +static void setup_fte_addr6(struct mlx5_flow_spec *spec, __be32 *saddr, + __be32 *daddr) +{ + if (addr6_all_zero(saddr) && addr6_all_zero(daddr)) + return; + + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, 6); + + if (!addr6_all_zero(saddr)) { + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), saddr, 16); + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), 0xff, 16); + } + + if (!addr6_all_zero(daddr)) { + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), daddr, 16); + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 0xff, 16); + } +} + +static void setup_fte_esp(struct mlx5_flow_spec *spec) +{ + /* ESP header */ + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_ESP); +} + +static void setup_fte_spi(struct mlx5_flow_spec *spec, u32 spi, bool encap) +{ + /* SPI number */ + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + + if (encap) { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters.inner_esp_spi); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters.inner_esp_spi, spi); + } else { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters.outer_esp_spi); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters.outer_esp_spi, spi); + } +} + +static void setup_fte_no_frags(struct mlx5_flow_spec *spec) +{ + /* Non fragmented */ + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.frag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.frag, 0); +} + +static void setup_fte_reg_a(struct mlx5_flow_spec *spec) +{ + /* Add IPsec indicator in metadata_reg_a */ + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + + MLX5_SET(fte_match_param, spec->match_criteria, + misc_parameters_2.metadata_reg_a, MLX5_ETH_WQE_FT_META_IPSEC); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters_2.metadata_reg_a, MLX5_ETH_WQE_FT_META_IPSEC); +} + +static void setup_fte_reg_c4(struct mlx5_flow_spec *spec, u32 reqid) +{ + /* Pass policy check before choosing this SA */ + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters_2.metadata_reg_c_4); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters_2.metadata_reg_c_4, reqid); +} + +static void setup_fte_upper_proto_match(struct mlx5_flow_spec *spec, struct upspec *upspec) +{ + switch (upspec->proto) { + case IPPROTO_UDP: + if (upspec->dport) { + MLX5_SET(fte_match_set_lyr_2_4, spec->match_criteria, + udp_dport, upspec->dport_mask); + MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, + udp_dport, upspec->dport); + } + if (upspec->sport) { + MLX5_SET(fte_match_set_lyr_2_4, spec->match_criteria, + udp_sport, upspec->sport_mask); + MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, + udp_sport, upspec->sport); + } + break; + case IPPROTO_TCP: + if (upspec->dport) { + MLX5_SET(fte_match_set_lyr_2_4, spec->match_criteria, + tcp_dport, upspec->dport_mask); + MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, + tcp_dport, upspec->dport); + } + if (upspec->sport) { + MLX5_SET(fte_match_set_lyr_2_4, spec->match_criteria, + tcp_sport, upspec->sport_mask); + MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, + tcp_sport, upspec->sport); + } + break; + default: + return; + } + + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, spec->match_criteria, ip_protocol); + MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, ip_protocol, upspec->proto); +} + +static enum mlx5_flow_namespace_type ipsec_fs_get_ns(struct mlx5e_ipsec *ipsec, + int type, u8 dir) +{ + if (ipsec->is_uplink_rep && type == XFRM_DEV_OFFLOAD_PACKET) + return MLX5_FLOW_NAMESPACE_FDB; + + if (dir == XFRM_DEV_OFFLOAD_IN) + return MLX5_FLOW_NAMESPACE_KERNEL; + + return MLX5_FLOW_NAMESPACE_EGRESS; +} + +static int setup_modify_header(struct mlx5e_ipsec *ipsec, int type, u32 val, u8 dir, + struct mlx5_flow_act *flow_act) +{ + enum mlx5_flow_namespace_type ns_type = ipsec_fs_get_ns(ipsec, type, dir); + u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5_modify_hdr *modify_hdr; + + MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); + switch (dir) { + case XFRM_DEV_OFFLOAD_IN: + MLX5_SET(set_action_in, action, field, + MLX5_ACTION_IN_FIELD_METADATA_REG_B); + break; + case XFRM_DEV_OFFLOAD_OUT: + MLX5_SET(set_action_in, action, field, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_4); + break; + default: + return -EINVAL; + } + + MLX5_SET(set_action_in, action, data, val); + MLX5_SET(set_action_in, action, offset, 0); + MLX5_SET(set_action_in, action, length, 32); + + modify_hdr = mlx5_modify_header_alloc(mdev, ns_type, 1, action); + if (IS_ERR(modify_hdr)) { + mlx5_core_err(mdev, "Failed to allocate modify_header %ld\n", + PTR_ERR(modify_hdr)); + return PTR_ERR(modify_hdr); + } + + flow_act->modify_hdr = modify_hdr; + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + return 0; +} + +static int +setup_pkt_tunnel_reformat(struct mlx5_core_dev *mdev, + struct mlx5_accel_esp_xfrm_attrs *attrs, + struct mlx5_pkt_reformat_params *reformat_params) +{ + struct ip_esp_hdr *esp_hdr; + struct ipv6hdr *ipv6hdr; + struct ethhdr *eth_hdr; + struct iphdr *iphdr; + char *reformatbf; + size_t bfflen; + void *hdr; + + bfflen = sizeof(*eth_hdr); + + if (attrs->dir == XFRM_DEV_OFFLOAD_OUT) { + bfflen += sizeof(*esp_hdr) + 8; + + switch (attrs->family) { + case AF_INET: + bfflen += sizeof(*iphdr); + break; + case AF_INET6: + bfflen += sizeof(*ipv6hdr); + break; + default: + return -EINVAL; + } + } + + reformatbf = kzalloc(bfflen, GFP_KERNEL); + if (!reformatbf) + return -ENOMEM; + + eth_hdr = (struct ethhdr *)reformatbf; + switch (attrs->family) { + case AF_INET: + eth_hdr->h_proto = htons(ETH_P_IP); + break; + case AF_INET6: + eth_hdr->h_proto = htons(ETH_P_IPV6); + break; + default: + goto free_reformatbf; + } + + ether_addr_copy(eth_hdr->h_dest, attrs->dmac); + ether_addr_copy(eth_hdr->h_source, attrs->smac); + + switch (attrs->dir) { + case XFRM_DEV_OFFLOAD_IN: + reformat_params->type = MLX5_REFORMAT_TYPE_L3_ESP_TUNNEL_TO_L2; + break; + case XFRM_DEV_OFFLOAD_OUT: + reformat_params->type = MLX5_REFORMAT_TYPE_L2_TO_L3_ESP_TUNNEL; + reformat_params->param_0 = attrs->authsize; + + hdr = reformatbf + sizeof(*eth_hdr); + switch (attrs->family) { + case AF_INET: + iphdr = (struct iphdr *)hdr; + memcpy(&iphdr->saddr, &attrs->saddr.a4, 4); + memcpy(&iphdr->daddr, &attrs->daddr.a4, 4); + iphdr->version = 4; + iphdr->ihl = 5; + iphdr->ttl = IPSEC_TUNNEL_DEFAULT_TTL; + iphdr->protocol = IPPROTO_ESP; + hdr += sizeof(*iphdr); + break; + case AF_INET6: + ipv6hdr = (struct ipv6hdr *)hdr; + memcpy(&ipv6hdr->saddr, &attrs->saddr.a6, 16); + memcpy(&ipv6hdr->daddr, &attrs->daddr.a6, 16); + ipv6hdr->nexthdr = IPPROTO_ESP; + ipv6hdr->version = 6; + ipv6hdr->hop_limit = IPSEC_TUNNEL_DEFAULT_TTL; + hdr += sizeof(*ipv6hdr); + break; + default: + goto free_reformatbf; + } + + esp_hdr = (struct ip_esp_hdr *)hdr; + esp_hdr->spi = htonl(attrs->spi); + break; + default: + goto free_reformatbf; + } + + reformat_params->size = bfflen; + reformat_params->data = reformatbf; + return 0; + +free_reformatbf: + kfree(reformatbf); + return -EINVAL; +} + +static int get_reformat_type(struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + switch (attrs->dir) { + case XFRM_DEV_OFFLOAD_IN: + if (attrs->encap) + return MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT_OVER_UDP; + return MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT; + case XFRM_DEV_OFFLOAD_OUT: + if (attrs->family == AF_INET) { + if (attrs->encap) + return MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_UDPV4; + return MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4; + } + + if (attrs->encap) + return MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_UDPV6; + return MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6; + default: + WARN_ON(true); + } + + return -EINVAL; +} + +static int +setup_pkt_transport_reformat(struct mlx5_accel_esp_xfrm_attrs *attrs, + struct mlx5_pkt_reformat_params *reformat_params) +{ + struct udphdr *udphdr; + char *reformatbf; + size_t bfflen; + __be32 spi; + void *hdr; + + reformat_params->type = get_reformat_type(attrs); + if (reformat_params->type < 0) + return reformat_params->type; + + switch (attrs->dir) { + case XFRM_DEV_OFFLOAD_IN: + break; + case XFRM_DEV_OFFLOAD_OUT: + bfflen = MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE; + if (attrs->encap) + bfflen += sizeof(*udphdr); + + reformatbf = kzalloc(bfflen, GFP_KERNEL); + if (!reformatbf) + return -ENOMEM; + + hdr = reformatbf; + if (attrs->encap) { + udphdr = (struct udphdr *)reformatbf; + udphdr->source = attrs->sport; + udphdr->dest = attrs->dport; + hdr += sizeof(*udphdr); + } + + /* convert to network format */ + spi = htonl(attrs->spi); + memcpy(hdr, &spi, sizeof(spi)); + + reformat_params->param_0 = attrs->authsize; + reformat_params->size = bfflen; + reformat_params->data = reformatbf; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int setup_pkt_reformat(struct mlx5e_ipsec *ipsec, + struct mlx5_accel_esp_xfrm_attrs *attrs, + struct mlx5_flow_act *flow_act) +{ + enum mlx5_flow_namespace_type ns_type = ipsec_fs_get_ns(ipsec, attrs->type, + attrs->dir); + struct mlx5_pkt_reformat_params reformat_params = {}; + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5_pkt_reformat *pkt_reformat; + int ret; + + switch (attrs->mode) { + case XFRM_MODE_TRANSPORT: + ret = setup_pkt_transport_reformat(attrs, &reformat_params); + break; + case XFRM_MODE_TUNNEL: + ret = setup_pkt_tunnel_reformat(mdev, attrs, &reformat_params); + break; + default: + ret = -EINVAL; + } + + if (ret) + return ret; + + pkt_reformat = + mlx5_packet_reformat_alloc(mdev, &reformat_params, ns_type); + kfree(reformat_params.data); + if (IS_ERR(pkt_reformat)) + return PTR_ERR(pkt_reformat); + + flow_act->pkt_reformat = pkt_reformat; + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + return 0; +} + +static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; + struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + struct mlx5_flow_destination dest[2]; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + struct mlx5e_ipsec_rx *rx; + struct mlx5_fc *counter; + int err = 0; + + rx = rx_ft_get(mdev, ipsec, attrs->family, attrs->type); + if (IS_ERR(rx)) + return PTR_ERR(rx); + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + err = -ENOMEM; + goto err_alloc; + } + + if (attrs->family == AF_INET) + setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4); + else + setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); + + setup_fte_spi(spec, attrs->spi, attrs->encap); + if (!attrs->encap) + setup_fte_esp(spec); + setup_fte_no_frags(spec); + setup_fte_upper_proto_match(spec, &attrs->upspec); + + if (rx != ipsec->rx_esw) + err = setup_modify_header(ipsec, attrs->type, + sa_entry->ipsec_obj_id | BIT(31), + XFRM_DEV_OFFLOAD_IN, &flow_act); + else + err = mlx5_esw_ipsec_rx_setup_modify_header(sa_entry, &flow_act); + + if (err) + goto err_mod_header; + + switch (attrs->type) { + case XFRM_DEV_OFFLOAD_PACKET: + err = setup_pkt_reformat(ipsec, attrs, &flow_act); + if (err) + goto err_pkt_reformat; + break; + default: + break; + } + + counter = mlx5_fc_create(mdev, true); + if (IS_ERR(counter)) { + err = PTR_ERR(counter); + goto err_add_cnt; + } + flow_act.crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_IPSEC; + flow_act.crypto.obj_id = sa_entry->ipsec_obj_id; + flow_act.flags |= FLOW_ACT_NO_APPEND; + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + if (attrs->drop) + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP; + else + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[0].ft = rx->ft.status; + dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[1].counter_id = mlx5_fc_id(counter); + rule = mlx5_add_flow_rules(rx->ft.sa, spec, &flow_act, dest, 2); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "fail to add RX ipsec rule err=%d\n", err); + goto err_add_flow; + } + kvfree(spec); + + sa_entry->ipsec_rule.rule = rule; + sa_entry->ipsec_rule.modify_hdr = flow_act.modify_hdr; + sa_entry->ipsec_rule.fc = counter; + sa_entry->ipsec_rule.pkt_reformat = flow_act.pkt_reformat; + return 0; + +err_add_flow: + mlx5_fc_destroy(mdev, counter); +err_add_cnt: + if (flow_act.pkt_reformat) + mlx5_packet_reformat_dealloc(mdev, flow_act.pkt_reformat); +err_pkt_reformat: + mlx5_modify_header_dealloc(mdev, flow_act.modify_hdr); +err_mod_header: + kvfree(spec); +err_alloc: + rx_ft_put(ipsec, attrs->family, attrs->type); + return err; +} + +static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; + struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + struct mlx5_flow_destination dest[2]; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + struct mlx5e_ipsec_tx *tx; + struct mlx5_fc *counter; + int err; + + tx = tx_ft_get(mdev, ipsec, attrs->type); + if (IS_ERR(tx)) + return PTR_ERR(tx); + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + err = -ENOMEM; + goto err_alloc; + } + + if (attrs->family == AF_INET) + setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4); + else + setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); + + setup_fte_no_frags(spec); + setup_fte_upper_proto_match(spec, &attrs->upspec); + + switch (attrs->type) { + case XFRM_DEV_OFFLOAD_CRYPTO: + setup_fte_spi(spec, attrs->spi, false); + setup_fte_esp(spec); + setup_fte_reg_a(spec); + break; + case XFRM_DEV_OFFLOAD_PACKET: + if (attrs->reqid) + setup_fte_reg_c4(spec, attrs->reqid); + err = setup_pkt_reformat(ipsec, attrs, &flow_act); + if (err) + goto err_pkt_reformat; + break; + default: + break; + } + + counter = mlx5_fc_create(mdev, true); + if (IS_ERR(counter)) { + err = PTR_ERR(counter); + goto err_add_cnt; + } + + flow_act.crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_IPSEC; + flow_act.crypto.obj_id = sa_entry->ipsec_obj_id; + flow_act.flags |= FLOW_ACT_NO_APPEND; + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + if (attrs->drop) + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP; + else + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + dest[0].ft = tx->ft.status; + dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[1].counter_id = mlx5_fc_id(counter); + rule = mlx5_add_flow_rules(tx->ft.sa, spec, &flow_act, dest, 2); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "fail to add TX ipsec rule err=%d\n", err); + goto err_add_flow; + } + + kvfree(spec); + sa_entry->ipsec_rule.rule = rule; + sa_entry->ipsec_rule.fc = counter; + sa_entry->ipsec_rule.pkt_reformat = flow_act.pkt_reformat; + return 0; + +err_add_flow: + mlx5_fc_destroy(mdev, counter); +err_add_cnt: + if (flow_act.pkt_reformat) + mlx5_packet_reformat_dealloc(mdev, flow_act.pkt_reformat); +err_pkt_reformat: + kvfree(spec); +err_alloc: + tx_ft_put(ipsec, attrs->type); + return err; +} + +static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) +{ + struct mlx5_accel_pol_xfrm_attrs *attrs = &pol_entry->attrs; + struct mlx5_core_dev *mdev = mlx5e_ipsec_pol2dev(pol_entry); + struct mlx5e_ipsec *ipsec = pol_entry->ipsec; + struct mlx5_flow_destination dest[2] = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + struct mlx5_flow_table *ft; + struct mlx5e_ipsec_tx *tx; + int err, dstn = 0; + + ft = tx_ft_get_policy(mdev, ipsec, attrs->prio, attrs->type); + if (IS_ERR(ft)) + return PTR_ERR(ft); + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + err = -ENOMEM; + goto err_alloc; + } + + tx = ipsec_tx(ipsec, attrs->type); + if (attrs->family == AF_INET) + setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4); + else + setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); + + setup_fte_no_frags(spec); + setup_fte_upper_proto_match(spec, &attrs->upspec); + + switch (attrs->action) { + case XFRM_POLICY_ALLOW: + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + if (!attrs->reqid) + break; + + err = setup_modify_header(ipsec, attrs->type, attrs->reqid, + XFRM_DEV_OFFLOAD_OUT, &flow_act); + if (err) + goto err_mod_header; + break; + case XFRM_POLICY_BLOCK: + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[dstn].counter_id = mlx5_fc_id(tx->fc->drop); + dstn++; + break; + default: + WARN_ON(true); + err = -EINVAL; + goto err_mod_header; + } + + flow_act.flags |= FLOW_ACT_NO_APPEND; + if (tx == ipsec->tx_esw && tx->chains) + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[dstn].ft = tx->ft.sa; + dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dstn++; + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, dstn); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "fail to add TX ipsec rule err=%d\n", err); + goto err_action; + } + + kvfree(spec); + pol_entry->ipsec_rule.rule = rule; + pol_entry->ipsec_rule.modify_hdr = flow_act.modify_hdr; + return 0; + +err_action: + if (flow_act.modify_hdr) + mlx5_modify_header_dealloc(mdev, flow_act.modify_hdr); +err_mod_header: + kvfree(spec); +err_alloc: + tx_ft_put_policy(ipsec, attrs->prio, attrs->type); + return err; +} + +static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) +{ + struct mlx5_accel_pol_xfrm_attrs *attrs = &pol_entry->attrs; + struct mlx5_core_dev *mdev = mlx5e_ipsec_pol2dev(pol_entry); + struct mlx5e_ipsec *ipsec = pol_entry->ipsec; + struct mlx5_flow_destination dest[2]; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + struct mlx5_flow_table *ft; + struct mlx5e_ipsec_rx *rx; + int err, dstn = 0; + + ft = rx_ft_get_policy(mdev, pol_entry->ipsec, attrs->family, attrs->prio, + attrs->type); + if (IS_ERR(ft)) + return PTR_ERR(ft); + + rx = ipsec_rx(pol_entry->ipsec, attrs->family, attrs->type); + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + err = -ENOMEM; + goto err_alloc; + } + + if (attrs->family == AF_INET) + setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4); + else + setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); + + setup_fte_no_frags(spec); + setup_fte_upper_proto_match(spec, &attrs->upspec); + + switch (attrs->action) { + case XFRM_POLICY_ALLOW: + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + break; + case XFRM_POLICY_BLOCK: + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; + dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[dstn].counter_id = mlx5_fc_id(rx->fc->drop); + dstn++; + break; + default: + WARN_ON(true); + err = -EINVAL; + goto err_action; + } + + flow_act.flags |= FLOW_ACT_NO_APPEND; + if (rx == ipsec->rx_esw && rx->chains) + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[dstn].ft = rx->ft.sa; + dstn++; + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, dstn); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Fail to add RX IPsec policy rule err=%d\n", err); + goto err_action; + } + + kvfree(spec); + pol_entry->ipsec_rule.rule = rule; + return 0; + +err_action: + kvfree(spec); +err_alloc: + rx_ft_put_policy(pol_entry->ipsec, attrs->family, attrs->prio, attrs->type); + return err; +} + +static void ipsec_fs_destroy_single_counter(struct mlx5_core_dev *mdev, + struct mlx5e_ipsec_fc *fc) +{ + mlx5_fc_destroy(mdev, fc->drop); + mlx5_fc_destroy(mdev, fc->cnt); + kfree(fc); +} + +static void ipsec_fs_destroy_counters(struct mlx5e_ipsec *ipsec) +{ + struct mlx5_core_dev *mdev = ipsec->mdev; + + ipsec_fs_destroy_single_counter(mdev, ipsec->tx->fc); + ipsec_fs_destroy_single_counter(mdev, ipsec->rx_ipv4->fc); + if (ipsec->is_uplink_rep) { + ipsec_fs_destroy_single_counter(mdev, ipsec->tx_esw->fc); + ipsec_fs_destroy_single_counter(mdev, ipsec->rx_esw->fc); + } +} + +static struct mlx5e_ipsec_fc *ipsec_fs_init_single_counter(struct mlx5_core_dev *mdev) +{ + struct mlx5e_ipsec_fc *fc; + struct mlx5_fc *counter; + int err; + + fc = kzalloc(sizeof(*fc), GFP_KERNEL); + if (!fc) + return ERR_PTR(-ENOMEM); + + counter = mlx5_fc_create(mdev, false); + if (IS_ERR(counter)) { + err = PTR_ERR(counter); + goto err_cnt; + } + fc->cnt = counter; + + counter = mlx5_fc_create(mdev, false); + if (IS_ERR(counter)) { + err = PTR_ERR(counter); + goto err_drop; + } + fc->drop = counter; + + return fc; + +err_drop: + mlx5_fc_destroy(mdev, fc->cnt); +err_cnt: + kfree(fc); + return ERR_PTR(err); +} + +static int ipsec_fs_init_counters(struct mlx5e_ipsec *ipsec) +{ + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5e_ipsec_fc *fc; + int err; + + fc = ipsec_fs_init_single_counter(mdev); + if (IS_ERR(fc)) { + err = PTR_ERR(fc); + goto err_rx_cnt; + } + ipsec->rx_ipv4->fc = fc; + + fc = ipsec_fs_init_single_counter(mdev); + if (IS_ERR(fc)) { + err = PTR_ERR(fc); + goto err_tx_cnt; + } + ipsec->tx->fc = fc; + + if (ipsec->is_uplink_rep) { + fc = ipsec_fs_init_single_counter(mdev); + if (IS_ERR(fc)) { + err = PTR_ERR(fc); + goto err_rx_esw_cnt; + } + ipsec->rx_esw->fc = fc; + + fc = ipsec_fs_init_single_counter(mdev); + if (IS_ERR(fc)) { + err = PTR_ERR(fc); + goto err_tx_esw_cnt; + } + ipsec->tx_esw->fc = fc; + } + + /* Both IPv4 and IPv6 point to same flow counters struct. */ + ipsec->rx_ipv6->fc = ipsec->rx_ipv4->fc; + return 0; + +err_tx_esw_cnt: + ipsec_fs_destroy_single_counter(mdev, ipsec->rx_esw->fc); +err_rx_esw_cnt: + ipsec_fs_destroy_single_counter(mdev, ipsec->tx->fc); +err_tx_cnt: + ipsec_fs_destroy_single_counter(mdev, ipsec->rx_ipv4->fc); +err_rx_cnt: + return err; +} + +void mlx5e_accel_ipsec_fs_read_stats(struct mlx5e_priv *priv, void *ipsec_stats) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_ipsec *ipsec = priv->ipsec; + struct mlx5e_ipsec_hw_stats *stats; + struct mlx5e_ipsec_fc *fc; + u64 packets, bytes; + + stats = (struct mlx5e_ipsec_hw_stats *)ipsec_stats; + + stats->ipsec_rx_pkts = 0; + stats->ipsec_rx_bytes = 0; + stats->ipsec_rx_drop_pkts = 0; + stats->ipsec_rx_drop_bytes = 0; + stats->ipsec_tx_pkts = 0; + stats->ipsec_tx_bytes = 0; + stats->ipsec_tx_drop_pkts = 0; + stats->ipsec_tx_drop_bytes = 0; + + fc = ipsec->rx_ipv4->fc; + mlx5_fc_query(mdev, fc->cnt, &stats->ipsec_rx_pkts, &stats->ipsec_rx_bytes); + mlx5_fc_query(mdev, fc->drop, &stats->ipsec_rx_drop_pkts, + &stats->ipsec_rx_drop_bytes); + + fc = ipsec->tx->fc; + mlx5_fc_query(mdev, fc->cnt, &stats->ipsec_tx_pkts, &stats->ipsec_tx_bytes); + mlx5_fc_query(mdev, fc->drop, &stats->ipsec_tx_drop_pkts, + &stats->ipsec_tx_drop_bytes); + + if (ipsec->is_uplink_rep) { + fc = ipsec->rx_esw->fc; + if (!mlx5_fc_query(mdev, fc->cnt, &packets, &bytes)) { + stats->ipsec_rx_pkts += packets; + stats->ipsec_rx_bytes += bytes; + } + + if (!mlx5_fc_query(mdev, fc->drop, &packets, &bytes)) { + stats->ipsec_rx_drop_pkts += packets; + stats->ipsec_rx_drop_bytes += bytes; + } + + fc = ipsec->tx_esw->fc; + if (!mlx5_fc_query(mdev, fc->cnt, &packets, &bytes)) { + stats->ipsec_tx_pkts += packets; + stats->ipsec_tx_bytes += bytes; + } + + if (!mlx5_fc_query(mdev, fc->drop, &packets, &bytes)) { + stats->ipsec_tx_drop_pkts += packets; + stats->ipsec_tx_drop_bytes += bytes; + } + } +} + +#ifdef CONFIG_MLX5_ESWITCH +static int mlx5e_ipsec_block_tc_offload(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int err = 0; + + if (esw) { + err = mlx5_esw_lock(esw); + if (err) + return err; + } + + if (mdev->num_block_ipsec) { + err = -EBUSY; + goto unlock; + } + + mdev->num_block_tc++; + +unlock: + if (esw) + mlx5_esw_unlock(esw); + + return err; +} +#else +static int mlx5e_ipsec_block_tc_offload(struct mlx5_core_dev *mdev) +{ + if (mdev->num_block_ipsec) + return -EBUSY; + + mdev->num_block_tc++; + return 0; +} +#endif + +static void mlx5e_ipsec_unblock_tc_offload(struct mlx5_core_dev *mdev) +{ + mdev->num_block_tc--; +} + +int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + int err; + + if (sa_entry->attrs.type == XFRM_DEV_OFFLOAD_PACKET) { + err = mlx5e_ipsec_block_tc_offload(sa_entry->ipsec->mdev); + if (err) + return err; + } + + if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_OUT) + err = tx_add_rule(sa_entry); + else + err = rx_add_rule(sa_entry); + + if (err) + goto err_out; + + return 0; + +err_out: + if (sa_entry->attrs.type == XFRM_DEV_OFFLOAD_PACKET) + mlx5e_ipsec_unblock_tc_offload(sa_entry->ipsec->mdev); + return err; +} + +void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule; + struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); + + mlx5_del_flow_rules(ipsec_rule->rule); + mlx5_fc_destroy(mdev, ipsec_rule->fc); + if (ipsec_rule->pkt_reformat) + mlx5_packet_reformat_dealloc(mdev, ipsec_rule->pkt_reformat); + + if (sa_entry->attrs.type == XFRM_DEV_OFFLOAD_PACKET) + mlx5e_ipsec_unblock_tc_offload(mdev); + + if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_OUT) { + tx_ft_put(sa_entry->ipsec, sa_entry->attrs.type); + return; + } + + mlx5_modify_header_dealloc(mdev, ipsec_rule->modify_hdr); + mlx5_esw_ipsec_rx_id_mapping_remove(sa_entry); + rx_ft_put(sa_entry->ipsec, sa_entry->attrs.family, sa_entry->attrs.type); +} + +int mlx5e_accel_ipsec_fs_add_pol(struct mlx5e_ipsec_pol_entry *pol_entry) +{ + int err; + + err = mlx5e_ipsec_block_tc_offload(pol_entry->ipsec->mdev); + if (err) + return err; + + if (pol_entry->attrs.dir == XFRM_DEV_OFFLOAD_OUT) + err = tx_add_policy(pol_entry); + else + err = rx_add_policy(pol_entry); + + if (err) + goto err_out; + + return 0; + +err_out: + mlx5e_ipsec_unblock_tc_offload(pol_entry->ipsec->mdev); + return err; +} + +void mlx5e_accel_ipsec_fs_del_pol(struct mlx5e_ipsec_pol_entry *pol_entry) +{ + struct mlx5e_ipsec_rule *ipsec_rule = &pol_entry->ipsec_rule; + struct mlx5_core_dev *mdev = mlx5e_ipsec_pol2dev(pol_entry); + + mlx5_del_flow_rules(ipsec_rule->rule); + + mlx5e_ipsec_unblock_tc_offload(pol_entry->ipsec->mdev); + + if (pol_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) { + rx_ft_put_policy(pol_entry->ipsec, pol_entry->attrs.family, + pol_entry->attrs.prio, pol_entry->attrs.type); + return; + } + + if (ipsec_rule->modify_hdr) + mlx5_modify_header_dealloc(mdev, ipsec_rule->modify_hdr); + + tx_ft_put_policy(pol_entry->ipsec, pol_entry->attrs.prio, pol_entry->attrs.type); +} + +void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec) +{ + if (!ipsec->tx) + return; + + if (ipsec->roce) + mlx5_ipsec_fs_roce_cleanup(ipsec->roce); + + ipsec_fs_destroy_counters(ipsec); + mutex_destroy(&ipsec->tx->ft.mutex); + WARN_ON(ipsec->tx->ft.refcnt); + kfree(ipsec->tx); + + mutex_destroy(&ipsec->rx_ipv4->ft.mutex); + WARN_ON(ipsec->rx_ipv4->ft.refcnt); + kfree(ipsec->rx_ipv4); + + mutex_destroy(&ipsec->rx_ipv6->ft.mutex); + WARN_ON(ipsec->rx_ipv6->ft.refcnt); + kfree(ipsec->rx_ipv6); + + if (ipsec->is_uplink_rep) { + xa_destroy(&ipsec->rx_esw->ipsec_obj_id_map); + + mutex_destroy(&ipsec->tx_esw->ft.mutex); + WARN_ON(ipsec->tx_esw->ft.refcnt); + kfree(ipsec->tx_esw); + + mutex_destroy(&ipsec->rx_esw->ft.mutex); + WARN_ON(ipsec->rx_esw->ft.refcnt); + kfree(ipsec->rx_esw); + } +} + +int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec) +{ + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5_flow_namespace *ns, *ns_esw; + int err = -ENOMEM; + + ns = mlx5_get_flow_namespace(ipsec->mdev, + MLX5_FLOW_NAMESPACE_EGRESS_IPSEC); + if (!ns) + return -EOPNOTSUPP; + + if (ipsec->is_uplink_rep) { + ns_esw = mlx5_get_flow_namespace(mdev, MLX5_FLOW_NAMESPACE_FDB); + if (!ns_esw) + return -EOPNOTSUPP; + + ipsec->tx_esw = kzalloc(sizeof(*ipsec->tx_esw), GFP_KERNEL); + if (!ipsec->tx_esw) + return -ENOMEM; + + ipsec->rx_esw = kzalloc(sizeof(*ipsec->rx_esw), GFP_KERNEL); + if (!ipsec->rx_esw) + goto err_rx_esw; + } + + ipsec->tx = kzalloc(sizeof(*ipsec->tx), GFP_KERNEL); + if (!ipsec->tx) + goto err_tx; + + ipsec->rx_ipv4 = kzalloc(sizeof(*ipsec->rx_ipv4), GFP_KERNEL); + if (!ipsec->rx_ipv4) + goto err_rx_ipv4; + + ipsec->rx_ipv6 = kzalloc(sizeof(*ipsec->rx_ipv6), GFP_KERNEL); + if (!ipsec->rx_ipv6) + goto err_rx_ipv6; + + err = ipsec_fs_init_counters(ipsec); + if (err) + goto err_counters; + + mutex_init(&ipsec->tx->ft.mutex); + mutex_init(&ipsec->rx_ipv4->ft.mutex); + mutex_init(&ipsec->rx_ipv6->ft.mutex); + ipsec->tx->ns = ns; + + if (ipsec->is_uplink_rep) { + mutex_init(&ipsec->tx_esw->ft.mutex); + mutex_init(&ipsec->rx_esw->ft.mutex); + ipsec->tx_esw->ns = ns_esw; + xa_init_flags(&ipsec->rx_esw->ipsec_obj_id_map, XA_FLAGS_ALLOC1); + } else if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ROCE) { + ipsec->roce = mlx5_ipsec_fs_roce_init(mdev); + } + + return 0; + +err_counters: + kfree(ipsec->rx_ipv6); +err_rx_ipv6: + kfree(ipsec->rx_ipv4); +err_rx_ipv4: + kfree(ipsec->tx); +err_tx: + kfree(ipsec->rx_esw); +err_rx_esw: + kfree(ipsec->tx_esw); + return err; +} + +void mlx5e_accel_ipsec_fs_modify(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5e_ipsec_sa_entry sa_entry_shadow = {}; + int err; + + memcpy(&sa_entry_shadow, sa_entry, sizeof(*sa_entry)); + memset(&sa_entry_shadow.ipsec_rule, 0x00, sizeof(sa_entry->ipsec_rule)); + + err = mlx5e_accel_ipsec_fs_add_rule(&sa_entry_shadow); + if (err) + return; + + mlx5e_accel_ipsec_fs_del_rule(sa_entry); + memcpy(sa_entry, &sa_entry_shadow, sizeof(*sa_entry)); +} + +bool mlx5e_ipsec_fs_tunnel_enabled(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; + struct mlx5e_ipsec_rx *rx; + struct mlx5e_ipsec_tx *tx; + + rx = ipsec_rx(sa_entry->ipsec, attrs->family, attrs->type); + tx = ipsec_tx(sa_entry->ipsec, attrs->type); + if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_OUT) + return tx->allow_tunnel_mode; + + return rx->allow_tunnel_mode; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c new file mode 100644 index 0000000000..ce29e31721 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -0,0 +1,627 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. */ + +#include "mlx5_core.h" +#include "en.h" +#include "ipsec.h" +#include "lib/crypto.h" +#include "fs_core.h" +#include "eswitch.h" + +enum { + MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET, + MLX5_IPSEC_ASO_REMOVE_FLOW_SOFT_LFT_OFFSET, +}; + +u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) +{ + u32 caps = 0; + + if (!MLX5_CAP_GEN(mdev, ipsec_offload)) + return 0; + + if (!MLX5_CAP_GEN(mdev, log_max_dek)) + return 0; + + if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) & + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC)) + return 0; + + if (!MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ipsec_encrypt) || + !MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ipsec_decrypt)) + return 0; + + if (!MLX5_CAP_IPSEC(mdev, ipsec_crypto_esp_aes_gcm_128_encrypt) || + !MLX5_CAP_IPSEC(mdev, ipsec_crypto_esp_aes_gcm_128_decrypt)) + return 0; + + if (MLX5_CAP_IPSEC(mdev, ipsec_crypto_offload) && + MLX5_CAP_ETH(mdev, insert_trailer) && MLX5_CAP_ETH(mdev, swp)) + caps |= MLX5_IPSEC_CAP_CRYPTO; + + if (MLX5_CAP_IPSEC(mdev, ipsec_full_offload) && + (mdev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_DMFS || + (mdev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS && + is_mdev_legacy_mode(mdev)))) { + if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, + reformat_add_esp_trasport) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + reformat_del_esp_trasport) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, decap)) + caps |= MLX5_IPSEC_CAP_PACKET_OFFLOAD; + + if ((MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ignore_flow_level) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ignore_flow_level)) || + MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, ignore_flow_level)) + caps |= MLX5_IPSEC_CAP_PRIO; + + if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, + reformat_l2_to_l3_esp_tunnel) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + reformat_l3_esp_tunnel_to_l2)) + caps |= MLX5_IPSEC_CAP_TUNNEL; + + if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, + reformat_add_esp_transport_over_udp) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + reformat_del_esp_transport_over_udp)) + caps |= MLX5_IPSEC_CAP_ESPINUDP; + } + + if (mlx5_get_roce_state(mdev) && + MLX5_CAP_GEN_2(mdev, flow_table_type_2_type) & MLX5_FT_NIC_RX_2_NIC_RX_RDMA && + MLX5_CAP_GEN_2(mdev, flow_table_type_2_type) & MLX5_FT_NIC_TX_RDMA_2_NIC_TX) + caps |= MLX5_IPSEC_CAP_ROCE; + + if (!caps) + return 0; + + if (MLX5_CAP_IPSEC(mdev, ipsec_esn)) + caps |= MLX5_IPSEC_CAP_ESN; + + /* We can accommodate up to 2^24 different IPsec objects + * because we use up to 24 bit in flow table metadata + * to hold the IPsec Object unique handle. + */ + WARN_ON_ONCE(MLX5_CAP_IPSEC(mdev, log_max_ipsec_offload) > 24); + return caps; +} +EXPORT_SYMBOL_GPL(mlx5_ipsec_device_caps); + +static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn, + struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + void *aso_ctx; + + aso_ctx = MLX5_ADDR_OF(ipsec_obj, obj, ipsec_aso); + if (attrs->replay_esn.trigger) { + MLX5_SET(ipsec_aso, aso_ctx, esn_event_arm, 1); + + if (attrs->dir == XFRM_DEV_OFFLOAD_IN) { + MLX5_SET(ipsec_aso, aso_ctx, window_sz, + attrs->replay_esn.replay_window); + MLX5_SET(ipsec_aso, aso_ctx, mode, + MLX5_IPSEC_ASO_REPLAY_PROTECTION); + } + MLX5_SET(ipsec_aso, aso_ctx, mode_parameter, + attrs->replay_esn.esn); + } + + /* ASO context */ + MLX5_SET(ipsec_obj, obj, ipsec_aso_access_pd, pdn); + MLX5_SET(ipsec_obj, obj, full_offload, 1); + MLX5_SET(ipsec_aso, aso_ctx, valid, 1); + /* MLX5_IPSEC_ASO_REG_C_4_5 is type C register that is used + * in flow steering to perform matching against. Please be + * aware that this register was chosen arbitrary and can't + * be used in other places as long as IPsec packet offload + * active. + */ + MLX5_SET(ipsec_obj, obj, aso_return_reg, MLX5_IPSEC_ASO_REG_C_4_5); + if (attrs->dir == XFRM_DEV_OFFLOAD_OUT) + MLX5_SET(ipsec_aso, aso_ctx, mode, MLX5_IPSEC_ASO_INC_SN); + + if (attrs->lft.hard_packet_limit != XFRM_INF) { + MLX5_SET(ipsec_aso, aso_ctx, remove_flow_pkt_cnt, + attrs->lft.hard_packet_limit); + MLX5_SET(ipsec_aso, aso_ctx, hard_lft_arm, 1); + } + + if (attrs->lft.soft_packet_limit != XFRM_INF) { + MLX5_SET(ipsec_aso, aso_ctx, remove_flow_soft_lft, + attrs->lft.soft_packet_limit); + + MLX5_SET(ipsec_aso, aso_ctx, soft_lft_arm, 1); + } +} + +static int mlx5_create_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; + struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); + struct aes_gcm_keymat *aes_gcm = &attrs->aes_gcm; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + u32 in[MLX5_ST_SZ_DW(create_ipsec_obj_in)] = {}; + void *obj, *salt_p, *salt_iv_p; + struct mlx5e_hw_objs *res; + int err; + + obj = MLX5_ADDR_OF(create_ipsec_obj_in, in, ipsec_object); + + /* salt and seq_iv */ + salt_p = MLX5_ADDR_OF(ipsec_obj, obj, salt); + memcpy(salt_p, &aes_gcm->salt, sizeof(aes_gcm->salt)); + + MLX5_SET(ipsec_obj, obj, icv_length, MLX5_IPSEC_OBJECT_ICV_LEN_16B); + salt_iv_p = MLX5_ADDR_OF(ipsec_obj, obj, implicit_iv); + memcpy(salt_iv_p, &aes_gcm->seq_iv, sizeof(aes_gcm->seq_iv)); + /* esn */ + if (attrs->replay_esn.trigger) { + MLX5_SET(ipsec_obj, obj, esn_en, 1); + MLX5_SET(ipsec_obj, obj, esn_msb, attrs->replay_esn.esn_msb); + MLX5_SET(ipsec_obj, obj, esn_overlap, attrs->replay_esn.overlap); + } + + MLX5_SET(ipsec_obj, obj, dekn, sa_entry->enc_key_id); + + /* general object fields set */ + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_IPSEC); + + res = &mdev->mlx5e_res.hw_objs; + if (attrs->type == XFRM_DEV_OFFLOAD_PACKET) + mlx5e_ipsec_packet_setup(obj, res->pdn, attrs); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (!err) + sa_entry->ipsec_obj_id = + MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return err; +} + +static void mlx5_destroy_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_IPSEC); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sa_entry->ipsec_obj_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_ipsec_create_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct aes_gcm_keymat *aes_gcm = &sa_entry->attrs.aes_gcm; + struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); + int err; + + /* key */ + err = mlx5_create_encryption_key(mdev, aes_gcm->aes_key, + aes_gcm->key_len / BITS_PER_BYTE, + MLX5_ACCEL_OBJ_IPSEC_KEY, + &sa_entry->enc_key_id); + if (err) { + mlx5_core_dbg(mdev, "Failed to create encryption key (err = %d)\n", err); + return err; + } + + err = mlx5_create_ipsec_obj(sa_entry); + if (err) { + mlx5_core_dbg(mdev, "Failed to create IPsec object (err = %d)\n", err); + goto err_enc_key; + } + + return 0; + +err_enc_key: + mlx5_destroy_encryption_key(mdev, sa_entry->enc_key_id); + return err; +} + +void mlx5_ipsec_free_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); + + mlx5_destroy_ipsec_obj(sa_entry); + mlx5_destroy_encryption_key(mdev, sa_entry->enc_key_id); +} + +static int mlx5_modify_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry, + const struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); + u32 in[MLX5_ST_SZ_DW(modify_ipsec_obj_in)] = {}; + u32 out[MLX5_ST_SZ_DW(query_ipsec_obj_out)]; + u64 modify_field_select = 0; + u64 general_obj_types; + void *obj; + int err; + + general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types); + if (!(general_obj_types & MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC)) + return -EINVAL; + + /* general object fields set */ + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_IPSEC); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sa_entry->ipsec_obj_id); + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) { + mlx5_core_err(mdev, "Query IPsec object failed (Object id %d), err = %d\n", + sa_entry->ipsec_obj_id, err); + return err; + } + + obj = MLX5_ADDR_OF(query_ipsec_obj_out, out, ipsec_object); + modify_field_select = MLX5_GET64(ipsec_obj, obj, modify_field_select); + + /* esn */ + if (!(modify_field_select & MLX5_MODIFY_IPSEC_BITMASK_ESN_OVERLAP) || + !(modify_field_select & MLX5_MODIFY_IPSEC_BITMASK_ESN_MSB)) + return -EOPNOTSUPP; + + obj = MLX5_ADDR_OF(modify_ipsec_obj_in, in, ipsec_object); + MLX5_SET64(ipsec_obj, obj, modify_field_select, + MLX5_MODIFY_IPSEC_BITMASK_ESN_OVERLAP | + MLX5_MODIFY_IPSEC_BITMASK_ESN_MSB); + MLX5_SET(ipsec_obj, obj, esn_msb, attrs->replay_esn.esn_msb); + MLX5_SET(ipsec_obj, obj, esn_overlap, attrs->replay_esn.overlap); + + /* general object fields set */ + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); + + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +void mlx5_accel_esp_modify_xfrm(struct mlx5e_ipsec_sa_entry *sa_entry, + const struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + int err; + + err = mlx5_modify_ipsec_obj(sa_entry, attrs); + if (err) + return; + + memcpy(&sa_entry->attrs, attrs, sizeof(sa_entry->attrs)); +} + +static void mlx5e_ipsec_aso_update(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_wqe_aso_ctrl_seg *data) +{ + data->data_mask_mode = MLX5_ASO_DATA_MASK_MODE_BITWISE_64BIT << 6; + data->condition_1_0_operand = MLX5_ASO_ALWAYS_TRUE | + MLX5_ASO_ALWAYS_TRUE << 4; + + mlx5e_ipsec_aso_query(sa_entry, data); +} + +static void mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry, + u32 mode_param) +{ + struct mlx5_accel_esp_xfrm_attrs attrs = {}; + struct mlx5_wqe_aso_ctrl_seg data = {}; + + if (mode_param < MLX5E_IPSEC_ESN_SCOPE_MID) { + sa_entry->esn_state.esn_msb++; + sa_entry->esn_state.overlap = 0; + } else { + sa_entry->esn_state.overlap = 1; + } + + mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs); + + /* It is safe to execute the modify below unlocked since the only flows + * that could affect this HW object, are create, destroy and this work. + * + * Creation flow can't co-exist with this modify work, the destruction + * flow would cancel this work, and this work is a single entity that + * can't conflict with it self. + */ + spin_unlock_bh(&sa_entry->x->lock); + mlx5_accel_esp_modify_xfrm(sa_entry, &attrs); + spin_lock_bh(&sa_entry->x->lock); + + data.data_offset_condition_operand = + MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET; + data.bitwise_data = cpu_to_be64(BIT_ULL(54)); + data.data_mask = data.bitwise_data; + + mlx5e_ipsec_aso_update(sa_entry, &data); +} + +static void mlx5e_ipsec_aso_update_hard(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5_wqe_aso_ctrl_seg data = {}; + + data.data_offset_condition_operand = + MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET; + data.bitwise_data = cpu_to_be64(BIT_ULL(57) + BIT_ULL(31)); + data.data_mask = data.bitwise_data; + mlx5e_ipsec_aso_update(sa_entry, &data); +} + +static void mlx5e_ipsec_aso_update_soft(struct mlx5e_ipsec_sa_entry *sa_entry, + u32 val) +{ + struct mlx5_wqe_aso_ctrl_seg data = {}; + + data.data_offset_condition_operand = + MLX5_IPSEC_ASO_REMOVE_FLOW_SOFT_LFT_OFFSET; + data.bitwise_data = cpu_to_be64(val); + data.data_mask = cpu_to_be64(U32_MAX); + mlx5e_ipsec_aso_update(sa_entry, &data); +} + +static void mlx5e_ipsec_handle_limits(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + struct mlx5e_ipsec_aso *aso = ipsec->aso; + bool soft_arm, hard_arm; + u64 hard_cnt; + + lockdep_assert_held(&sa_entry->x->lock); + + soft_arm = !MLX5_GET(ipsec_aso, aso->ctx, soft_lft_arm); + hard_arm = !MLX5_GET(ipsec_aso, aso->ctx, hard_lft_arm); + if (!soft_arm && !hard_arm) + /* It is not lifetime event */ + return; + + hard_cnt = MLX5_GET(ipsec_aso, aso->ctx, remove_flow_pkt_cnt); + if (!hard_cnt || hard_arm) { + /* It is possible to see packet counter equal to zero without + * hard limit event armed. Such situation can be if packet + * decreased, while we handled soft limit event. + * + * However it will be HW/FW bug if hard limit event is raised + * and packet counter is not zero. + */ + WARN_ON_ONCE(hard_arm && hard_cnt); + + /* Notify about hard limit */ + xfrm_state_check_expire(sa_entry->x); + return; + } + + /* We are in soft limit event. */ + if (!sa_entry->limits.soft_limit_hit && + sa_entry->limits.round == attrs->lft.numb_rounds_soft) { + sa_entry->limits.soft_limit_hit = true; + /* Notify about soft limit */ + xfrm_state_check_expire(sa_entry->x); + + if (sa_entry->limits.round == attrs->lft.numb_rounds_hard) + goto hard; + + if (attrs->lft.soft_packet_limit > BIT_ULL(31)) { + /* We cannot avoid a soft_value that might have the high + * bit set. For instance soft_value=2^31+1 cannot be + * adjusted to the low bit clear version of soft_value=1 + * because it is too close to 0. + * + * Thus we have this corner case where we can hit the + * soft_limit with the high bit set, but cannot adjust + * the counter. Thus we set a temporary interrupt_value + * at least 2^30 away from here and do the adjustment + * then. + */ + mlx5e_ipsec_aso_update_soft(sa_entry, + BIT_ULL(31) - BIT_ULL(30)); + sa_entry->limits.fix_limit = true; + return; + } + + sa_entry->limits.fix_limit = true; + } + +hard: + if (sa_entry->limits.round == attrs->lft.numb_rounds_hard) { + mlx5e_ipsec_aso_update_soft(sa_entry, 0); + attrs->lft.soft_packet_limit = XFRM_INF; + return; + } + + mlx5e_ipsec_aso_update_hard(sa_entry); + sa_entry->limits.round++; + if (sa_entry->limits.round == attrs->lft.numb_rounds_soft) + mlx5e_ipsec_aso_update_soft(sa_entry, + attrs->lft.soft_packet_limit); + if (sa_entry->limits.fix_limit) { + sa_entry->limits.fix_limit = false; + mlx5e_ipsec_aso_update_soft(sa_entry, BIT_ULL(31) - 1); + } +} + +static void mlx5e_ipsec_handle_event(struct work_struct *_work) +{ + struct mlx5e_ipsec_work *work = + container_of(_work, struct mlx5e_ipsec_work, work); + struct mlx5e_ipsec_sa_entry *sa_entry = work->data; + struct mlx5_accel_esp_xfrm_attrs *attrs; + struct mlx5e_ipsec_aso *aso; + int ret; + + aso = sa_entry->ipsec->aso; + attrs = &sa_entry->attrs; + + spin_lock_bh(&sa_entry->x->lock); + ret = mlx5e_ipsec_aso_query(sa_entry, NULL); + if (ret) + goto unlock; + + if (attrs->replay_esn.trigger && + !MLX5_GET(ipsec_aso, aso->ctx, esn_event_arm)) { + u32 mode_param = MLX5_GET(ipsec_aso, aso->ctx, mode_parameter); + + mlx5e_ipsec_update_esn_state(sa_entry, mode_param); + } + + if (attrs->lft.soft_packet_limit != XFRM_INF) + mlx5e_ipsec_handle_limits(sa_entry); + +unlock: + spin_unlock_bh(&sa_entry->x->lock); + kfree(work); +} + +static int mlx5e_ipsec_event(struct notifier_block *nb, unsigned long event, + void *data) +{ + struct mlx5e_ipsec *ipsec = container_of(nb, struct mlx5e_ipsec, nb); + struct mlx5e_ipsec_sa_entry *sa_entry; + struct mlx5_eqe_obj_change *object; + struct mlx5e_ipsec_work *work; + struct mlx5_eqe *eqe = data; + u16 type; + + if (event != MLX5_EVENT_TYPE_OBJECT_CHANGE) + return NOTIFY_DONE; + + object = &eqe->data.obj_change; + type = be16_to_cpu(object->obj_type); + + if (type != MLX5_GENERAL_OBJECT_TYPES_IPSEC) + return NOTIFY_DONE; + + sa_entry = xa_load(&ipsec->sadb, be32_to_cpu(object->obj_id)); + if (!sa_entry) + return NOTIFY_DONE; + + work = kmalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return NOTIFY_DONE; + + INIT_WORK(&work->work, mlx5e_ipsec_handle_event); + work->data = sa_entry; + + queue_work(ipsec->wq, &work->work); + return NOTIFY_OK; +} + +int mlx5e_ipsec_aso_init(struct mlx5e_ipsec *ipsec) +{ + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5e_ipsec_aso *aso; + struct mlx5e_hw_objs *res; + struct device *pdev; + int err; + + aso = kzalloc(sizeof(*ipsec->aso), GFP_KERNEL); + if (!aso) + return -ENOMEM; + + res = &mdev->mlx5e_res.hw_objs; + + pdev = mlx5_core_dma_dev(mdev); + aso->dma_addr = dma_map_single(pdev, aso->ctx, sizeof(aso->ctx), + DMA_BIDIRECTIONAL); + err = dma_mapping_error(pdev, aso->dma_addr); + if (err) + goto err_dma; + + aso->aso = mlx5_aso_create(mdev, res->pdn); + if (IS_ERR(aso->aso)) { + err = PTR_ERR(aso->aso); + goto err_aso_create; + } + + spin_lock_init(&aso->lock); + ipsec->nb.notifier_call = mlx5e_ipsec_event; + mlx5_notifier_register(mdev, &ipsec->nb); + + ipsec->aso = aso; + return 0; + +err_aso_create: + dma_unmap_single(pdev, aso->dma_addr, sizeof(aso->ctx), + DMA_BIDIRECTIONAL); +err_dma: + kfree(aso); + return err; +} + +void mlx5e_ipsec_aso_cleanup(struct mlx5e_ipsec *ipsec) +{ + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5e_ipsec_aso *aso; + struct device *pdev; + + aso = ipsec->aso; + pdev = mlx5_core_dma_dev(mdev); + + mlx5_notifier_unregister(mdev, &ipsec->nb); + mlx5_aso_destroy(aso->aso); + dma_unmap_single(pdev, aso->dma_addr, sizeof(aso->ctx), + DMA_BIDIRECTIONAL); + kfree(aso); + ipsec->aso = NULL; +} + +static void mlx5e_ipsec_aso_copy(struct mlx5_wqe_aso_ctrl_seg *ctrl, + struct mlx5_wqe_aso_ctrl_seg *data) +{ + if (!data) + return; + + ctrl->data_mask_mode = data->data_mask_mode; + ctrl->condition_1_0_operand = data->condition_1_0_operand; + ctrl->condition_1_0_offset = data->condition_1_0_offset; + ctrl->data_offset_condition_operand = data->data_offset_condition_operand; + ctrl->condition_0_data = data->condition_0_data; + ctrl->condition_0_mask = data->condition_0_mask; + ctrl->condition_1_data = data->condition_1_data; + ctrl->condition_1_mask = data->condition_1_mask; + ctrl->bitwise_data = data->bitwise_data; + ctrl->data_mask = data->data_mask; +} + +int mlx5e_ipsec_aso_query(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_wqe_aso_ctrl_seg *data) +{ + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + struct mlx5e_ipsec_aso *aso = ipsec->aso; + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5_wqe_aso_ctrl_seg *ctrl; + struct mlx5e_hw_objs *res; + struct mlx5_aso_wqe *wqe; + unsigned long expires; + u8 ds_cnt; + int ret; + + lockdep_assert_held(&sa_entry->x->lock); + res = &mdev->mlx5e_res.hw_objs; + + spin_lock_bh(&aso->lock); + memset(aso->ctx, 0, sizeof(aso->ctx)); + wqe = mlx5_aso_get_wqe(aso->aso); + ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS); + mlx5_aso_build_wqe(aso->aso, ds_cnt, wqe, sa_entry->ipsec_obj_id, + MLX5_ACCESS_ASO_OPC_MOD_IPSEC); + + ctrl = &wqe->aso_ctrl; + ctrl->va_l = + cpu_to_be32(lower_32_bits(aso->dma_addr) | ASO_CTRL_READ_EN); + ctrl->va_h = cpu_to_be32(upper_32_bits(aso->dma_addr)); + ctrl->l_key = cpu_to_be32(res->mkey); + mlx5e_ipsec_aso_copy(ctrl, data); + + mlx5_aso_post_wqe(aso->aso, false, &wqe->ctrl); + expires = jiffies + msecs_to_jiffies(10); + do { + ret = mlx5_aso_poll_cq(aso->aso, false); + if (ret) + /* We are in atomic context */ + udelay(10); + } while (ret && time_is_after_jiffies(expires)); + spin_unlock_bh(&aso->lock); + return ret; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c new file mode 100644 index 0000000000..51a144246e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -0,0 +1,381 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include +#include +#include +#include "ipsec.h" +#include "ipsec_rxtx.h" +#include "en.h" +#include "esw/ipsec_fs.h" + +enum { + MLX5E_IPSEC_TX_SYNDROME_OFFLOAD = 0x8, + MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP = 0x9, +}; + +static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x) +{ + unsigned int alen = crypto_aead_authsize(x->data); + struct ipv6hdr *ipv6hdr = ipv6_hdr(skb); + struct iphdr *ipv4hdr = ip_hdr(skb); + unsigned int trailer_len; + u8 plen; + int ret; + + ret = skb_copy_bits(skb, skb->len - alen - 2, &plen, 1); + if (unlikely(ret)) + return ret; + + trailer_len = alen + plen + 2; + + ret = pskb_trim(skb, skb->len - trailer_len); + if (unlikely(ret)) + return ret; + if (skb->protocol == htons(ETH_P_IP)) { + ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len); + ip_send_check(ipv4hdr); + } else { + ipv6hdr->payload_len = htons(ntohs(ipv6hdr->payload_len) - + trailer_len); + } + return 0; +} + +static void mlx5e_ipsec_set_swp(struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg, u8 mode, + struct xfrm_offload *xo) +{ + /* Tunnel Mode: + * SWP: OutL3 InL3 InL4 + * Pkt: MAC IP ESP IP L4 + * + * Transport Mode: + * SWP: OutL3 OutL4 + * Pkt: MAC IP ESP L4 + * + * Tunnel(VXLAN TCP/UDP) over Transport Mode + * SWP: OutL3 InL3 InL4 + * Pkt: MAC IP ESP UDP VXLAN IP L4 + */ + + /* Shared settings */ + eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2; + if (skb->protocol == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6; + + /* Tunnel mode */ + if (mode == XFRM_MODE_TUNNEL) { + eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; + if (xo->proto == IPPROTO_IPV6) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; + + switch (xo->inner_ipproto) { + case IPPROTO_UDP: + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP; + fallthrough; + case IPPROTO_TCP: + /* IP | ESP | IP | [TCP | UDP] */ + eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2; + break; + default: + break; + } + return; + } + + /* Transport mode */ + if (mode != XFRM_MODE_TRANSPORT) + return; + + if (!xo->inner_ipproto) { + switch (xo->proto) { + case IPPROTO_UDP: + eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_UDP; + fallthrough; + case IPPROTO_TCP: + /* IP | ESP | TCP */ + eseg->swp_outer_l4_offset = skb_inner_transport_offset(skb) / 2; + break; + default: + break; + } + } else { + /* Tunnel(VXLAN TCP/UDP) over Transport Mode */ + switch (xo->inner_ipproto) { + case IPPROTO_UDP: + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP; + fallthrough; + case IPPROTO_TCP: + eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; + eseg->swp_inner_l4_offset = + (skb->csum_start + skb->head - skb->data) / 2; + if (inner_ip_hdr(skb)->version == 6) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; + break; + default: + break; + } + } + +} + +void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x, + struct xfrm_offload *xo) +{ + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + __u32 oseq = replay_esn->oseq; + int iv_offset; + __be64 seqno; + u32 seq_hi; + + if (unlikely(skb_is_gso(skb) && oseq < MLX5E_IPSEC_ESN_SCOPE_MID && + MLX5E_IPSEC_ESN_SCOPE_MID < (oseq - skb_shinfo(skb)->gso_segs))) { + seq_hi = xo->seq.hi - 1; + } else { + seq_hi = xo->seq.hi; + } + + /* Place the SN in the IV field */ + seqno = cpu_to_be64(xo->seq.low + ((u64)seq_hi << 32)); + iv_offset = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr); + skb_store_bits(skb, iv_offset, &seqno, 8); +} + +void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x, + struct xfrm_offload *xo) +{ + int iv_offset; + __be64 seqno; + + /* Place the SN in the IV field */ + seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32)); + iv_offset = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr); + skb_store_bits(skb, iv_offset, &seqno, 8); +} + +void mlx5e_ipsec_handle_tx_wqe(struct mlx5e_tx_wqe *wqe, + struct mlx5e_accel_tx_ipsec_state *ipsec_st, + struct mlx5_wqe_inline_seg *inlseg) +{ + inlseg->byte_count = cpu_to_be32(ipsec_st->tailen | MLX5_INLINE_SEG); + esp_output_fill_trailer((u8 *)inlseg->data, 0, ipsec_st->plen, ipsec_st->xo->proto); +} + +static int mlx5e_ipsec_set_state(struct mlx5e_priv *priv, + struct sk_buff *skb, + struct xfrm_state *x, + struct xfrm_offload *xo, + struct mlx5e_accel_tx_ipsec_state *ipsec_st) +{ + unsigned int blksize, clen, alen, plen; + struct crypto_aead *aead; + unsigned int tailen; + + ipsec_st->x = x; + ipsec_st->xo = xo; + aead = x->data; + alen = crypto_aead_authsize(aead); + blksize = ALIGN(crypto_aead_blocksize(aead), 4); + clen = ALIGN(skb->len + 2, blksize); + plen = max_t(u32, clen - skb->len, 4); + tailen = plen + alen; + ipsec_st->plen = plen; + ipsec_st->tailen = tailen; + + return 0; +} + +void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg) +{ + struct xfrm_offload *xo = xfrm_offload(skb); + struct xfrm_encap_tmpl *encap; + struct xfrm_state *x; + struct sec_path *sp; + u8 l3_proto; + + sp = skb_sec_path(skb); + if (unlikely(sp->len != 1)) + return; + + x = xfrm_input_state(skb); + if (unlikely(!x)) + return; + + if (unlikely(!x->xso.offload_handle || + (skb->protocol != htons(ETH_P_IP) && + skb->protocol != htons(ETH_P_IPV6)))) + return; + + mlx5e_ipsec_set_swp(skb, eseg, x->props.mode, xo); + + l3_proto = (x->props.family == AF_INET) ? + ((struct iphdr *)skb_network_header(skb))->protocol : + ((struct ipv6hdr *)skb_network_header(skb))->nexthdr; + + eseg->flow_table_metadata |= cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC); + eseg->trailer |= cpu_to_be32(MLX5_ETH_WQE_INSERT_TRAILER); + encap = x->encap; + if (!encap) { + eseg->trailer |= (l3_proto == IPPROTO_ESP) ? + cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_IP_ASSOC) : + cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_L4_ASSOC); + } else if (encap->encap_type == UDP_ENCAP_ESPINUDP) { + eseg->trailer |= (l3_proto == IPPROTO_ESP) ? + cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_IP_ASSOC) : + cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_L4_ASSOC); + } +} + +bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, + struct sk_buff *skb, + struct mlx5e_accel_tx_ipsec_state *ipsec_st) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct xfrm_offload *xo = xfrm_offload(skb); + struct mlx5e_ipsec_sa_entry *sa_entry; + struct xfrm_state *x; + struct sec_path *sp; + + sp = skb_sec_path(skb); + if (unlikely(sp->len != 1)) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_bundle); + goto drop; + } + + x = xfrm_input_state(skb); + if (unlikely(!x)) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_no_state); + goto drop; + } + + if (unlikely(!x->xso.offload_handle || + (skb->protocol != htons(ETH_P_IP) && + skb->protocol != htons(ETH_P_IPV6)))) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_not_ip); + goto drop; + } + + if (!skb_is_gso(skb)) + if (unlikely(mlx5e_ipsec_remove_trailer(skb, x))) { + atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_trailer); + goto drop; + } + + sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle; + sa_entry->set_iv_op(skb, x, xo); + mlx5e_ipsec_set_state(priv, skb, x, xo, ipsec_st); + + return true; + +drop: + kfree_skb(skb); + return false; +} + +enum { + MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED, + MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_AUTH_FAILED, + MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_BAD_TRAILER, +}; + +void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, + struct sk_buff *skb, + u32 ipsec_meta_data) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_ipsec *ipsec = priv->ipsec; + struct mlx5e_ipsec_sa_entry *sa_entry; + struct xfrm_offload *xo; + struct sec_path *sp; + u32 sa_handle; + + sa_handle = MLX5_IPSEC_METADATA_HANDLE(ipsec_meta_data); + sp = secpath_set(skb); + if (unlikely(!sp)) { + atomic64_inc(&ipsec->sw_stats.ipsec_rx_drop_sp_alloc); + return; + } + + rcu_read_lock(); + sa_entry = xa_load(&ipsec->sadb, sa_handle); + if (unlikely(!sa_entry)) { + rcu_read_unlock(); + atomic64_inc(&ipsec->sw_stats.ipsec_rx_drop_sadb_miss); + return; + } + xfrm_state_hold(sa_entry->x); + rcu_read_unlock(); + + sp->xvec[sp->len++] = sa_entry->x; + sp->olen++; + + xo = xfrm_offload(skb); + xo->flags = CRYPTO_DONE; + + switch (MLX5_IPSEC_METADATA_SYNDROM(ipsec_meta_data)) { + case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED: + xo->status = CRYPTO_SUCCESS; + break; + case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_AUTH_FAILED: + xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED; + break; + case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_BAD_TRAILER: + xo->status = CRYPTO_INVALID_PACKET_SYNTAX; + break; + default: + atomic64_inc(&ipsec->sw_stats.ipsec_rx_drop_syndrome); + } +} + +int mlx5_esw_ipsec_rx_make_metadata(struct mlx5e_priv *priv, u32 id, u32 *metadata) +{ + struct mlx5e_ipsec *ipsec = priv->ipsec; + u32 ipsec_obj_id; + int err; + + if (!ipsec || !ipsec->is_uplink_rep) + return -EINVAL; + + err = mlx5_esw_ipsec_rx_ipsec_obj_id_search(priv, id, &ipsec_obj_id); + if (err) { + atomic64_inc(&ipsec->sw_stats.ipsec_rx_drop_sadb_miss); + return err; + } + + *metadata = MLX5_IPSEC_METADATA_CREATE(ipsec_obj_id, + MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h new file mode 100644 index 0000000000..9ee014a8ad --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5E_IPSEC_RXTX_H__ +#define __MLX5E_IPSEC_RXTX_H__ + +#include +#include +#include "en.h" +#include "en/txrx.h" + +/* Bit31: IPsec marker, Bit30: reserved, Bit29-24: IPsec syndrome, Bit23-0: IPsec obj id */ +#define MLX5_IPSEC_METADATA_MARKER(metadata) (((metadata) >> 31) & 0x1) +#define MLX5_IPSEC_METADATA_SYNDROM(metadata) (((metadata) >> 24) & GENMASK(5, 0)) +#define MLX5_IPSEC_METADATA_HANDLE(metadata) ((metadata) & GENMASK(23, 0)) +#define MLX5_IPSEC_METADATA_CREATE(id, syndrome) ((id) | ((syndrome) << 24)) + +struct mlx5e_accel_tx_ipsec_state { + struct xfrm_offload *xo; + struct xfrm_state *x; + u32 tailen; + u32 plen; +}; + +#ifdef CONFIG_MLX5_EN_IPSEC + +void mlx5e_ipsec_inverse_table_init(void); +void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x, + struct xfrm_offload *xo); +void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x, + struct xfrm_offload *xo); +bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, + struct sk_buff *skb, + struct mlx5e_accel_tx_ipsec_state *ipsec_st); +void mlx5e_ipsec_handle_tx_wqe(struct mlx5e_tx_wqe *wqe, + struct mlx5e_accel_tx_ipsec_state *ipsec_st, + struct mlx5_wqe_inline_seg *inlseg); +void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, + struct sk_buff *skb, + u32 ipsec_meta_data); +int mlx5_esw_ipsec_rx_make_metadata(struct mlx5e_priv *priv, u32 id, u32 *metadata); +static inline unsigned int mlx5e_ipsec_tx_ids_len(struct mlx5e_accel_tx_ipsec_state *ipsec_st) +{ + return ipsec_st->tailen; +} + +static inline bool mlx5_ipsec_is_rx_flow(struct mlx5_cqe64 *cqe) +{ + return MLX5_IPSEC_METADATA_MARKER(be32_to_cpu(cqe->ft_metadata)); +} + +static inline bool mlx5e_ipsec_eseg_meta(struct mlx5_wqe_eth_seg *eseg) +{ + return eseg->flow_table_metadata & cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC); +} + +void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg); + +static inline netdev_features_t +mlx5e_ipsec_feature_check(struct sk_buff *skb, netdev_features_t features) +{ + struct xfrm_offload *xo = xfrm_offload(skb); + struct sec_path *sp = skb_sec_path(skb); + + if (sp && sp->len && xo) { + struct xfrm_state *x = sp->xvec[0]; + + if (!x || !x->xso.offload_handle) + goto out_disable; + + if (xo->inner_ipproto) { + /* Cannot support tunnel packet over IPsec tunnel mode + * because we cannot offload three IP header csum + */ + if (x->props.mode == XFRM_MODE_TUNNEL) + goto out_disable; + + /* Only support UDP or TCP L4 checksum */ + if (xo->inner_ipproto != IPPROTO_UDP && + xo->inner_ipproto != IPPROTO_TCP) + goto out_disable; + } + + return features; + + } + + /* Disable CSUM and GSO for software IPsec */ +out_disable: + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); +} + +static inline bool +mlx5e_ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg) +{ + u8 inner_ipproto; + + if (!mlx5e_ipsec_eseg_meta(eseg)) + return false; + + eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; + inner_ipproto = xfrm_offload(skb)->inner_ipproto; + if (inner_ipproto) { + eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM; + if (inner_ipproto == IPPROTO_TCP || inner_ipproto == IPPROTO_UDP) + eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM; + } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; + sq->stats->csum_partial_inner++; + } + + return true; +} +#else +static inline +void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, + struct sk_buff *skb, + u32 ipsec_meta_data) +{} + +static inline bool mlx5e_ipsec_eseg_meta(struct mlx5_wqe_eth_seg *eseg) +{ + return false; +} + +static inline bool mlx5_ipsec_is_rx_flow(struct mlx5_cqe64 *cqe) { return false; } +static inline netdev_features_t +mlx5e_ipsec_feature_check(struct sk_buff *skb, netdev_features_t features) +{ return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } + +static inline bool +mlx5e_ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg) +{ + return false; +} +#endif /* CONFIG_MLX5_EN_IPSEC */ + +#endif /* __MLX5E_IPSEC_RXTX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c new file mode 100644 index 0000000000..e0e36a0972 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include +#include + +#include "en.h" +#include "ipsec.h" + +static const struct counter_desc mlx5e_ipsec_hw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_rx_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_rx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_rx_drop_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_rx_drop_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_tx_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_tx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_tx_drop_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_tx_drop_bytes) }, +}; + +static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sp_alloc) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sadb_miss) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_syndrome) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_bundle) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_no_state) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_not_ip) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_trailer) }, +}; + +#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \ + atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset)) + +#define NUM_IPSEC_HW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_hw_stats_desc) +#define NUM_IPSEC_SW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_sw_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec_hw) +{ + if (!priv->ipsec) + return 0; + + return NUM_IPSEC_HW_COUNTERS; +} + +static inline MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ipsec_hw) {} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ipsec_hw) +{ + unsigned int i; + + if (!priv->ipsec) + return idx; + + for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + mlx5e_ipsec_hw_stats_desc[i].format); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec_hw) +{ + int i; + + if (!priv->ipsec) + return idx; + + mlx5e_accel_ipsec_fs_read_stats(priv, &priv->ipsec->hw_stats); + for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->ipsec->hw_stats, + mlx5e_ipsec_hw_stats_desc, i); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec_sw) +{ + return priv->ipsec ? NUM_IPSEC_SW_COUNTERS : 0; +} + +static inline MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ipsec_sw) {} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ipsec_sw) +{ + unsigned int i; + + if (priv->ipsec) + for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + mlx5e_ipsec_sw_stats_desc[i].format); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec_sw) +{ + int i; + + if (priv->ipsec) + for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->ipsec->sw_stats, + mlx5e_ipsec_sw_stats_desc, i); + return idx; +} + +MLX5E_DEFINE_STATS_GRP(ipsec_hw, 0); +MLX5E_DEFINE_STATS_GRP(ipsec_sw, 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c new file mode 100644 index 0000000000..984fa04bd3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c @@ -0,0 +1,220 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies. + +#include +#include "en.h" +#include "lib/mlx5.h" +#include "lib/crypto.h" +#include "en_accel/ktls.h" +#include "en_accel/ktls_utils.h" +#include "en_accel/fs_tcp.h" + +struct mlx5_crypto_dek *mlx5_ktls_create_key(struct mlx5_crypto_dek_pool *dek_pool, + struct tls_crypto_info *crypto_info) +{ + const void *key; + u32 sz_bytes; + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 *info = + (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + + key = info->key; + sz_bytes = sizeof(info->key); + break; + } + case TLS_CIPHER_AES_GCM_256: { + struct tls12_crypto_info_aes_gcm_256 *info = + (struct tls12_crypto_info_aes_gcm_256 *)crypto_info; + + key = info->key; + sz_bytes = sizeof(info->key); + break; + } + default: + return ERR_PTR(-EINVAL); + } + + return mlx5_crypto_dek_create(dek_pool, key, sz_bytes); +} + +void mlx5_ktls_destroy_key(struct mlx5_crypto_dek_pool *dek_pool, + struct mlx5_crypto_dek *dek) +{ + mlx5_crypto_dek_destroy(dek_pool, dek); +} + +static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk, + enum tls_offload_ctx_dir direction, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + if (!mlx5e_ktls_type_check(mdev, crypto_info)) + return -EOPNOTSUPP; + + if (direction == TLS_OFFLOAD_CTX_DIR_TX) + err = mlx5e_ktls_add_tx(netdev, sk, crypto_info, start_offload_tcp_sn); + else + err = mlx5e_ktls_add_rx(netdev, sk, crypto_info, start_offload_tcp_sn); + + return err; +} + +static void mlx5e_ktls_del(struct net_device *netdev, + struct tls_context *tls_ctx, + enum tls_offload_ctx_dir direction) +{ + if (direction == TLS_OFFLOAD_CTX_DIR_TX) + mlx5e_ktls_del_tx(netdev, tls_ctx); + else + mlx5e_ktls_del_rx(netdev, tls_ctx); +} + +static int mlx5e_ktls_resync(struct net_device *netdev, + struct sock *sk, u32 seq, u8 *rcd_sn, + enum tls_offload_ctx_dir direction) +{ + if (unlikely(direction != TLS_OFFLOAD_CTX_DIR_RX)) + return -EOPNOTSUPP; + + mlx5e_ktls_rx_resync(netdev, sk, seq, rcd_sn); + return 0; +} + +static const struct tlsdev_ops mlx5e_ktls_ops = { + .tls_dev_add = mlx5e_ktls_add, + .tls_dev_del = mlx5e_ktls_del, + .tls_dev_resync = mlx5e_ktls_resync, +}; + +bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev) +{ + u8 max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev); + + if (is_kdump_kernel() || !MLX5_CAP_GEN(mdev, tls_rx)) + return false; + + /* Check the possibility to post the required ICOSQ WQEs. */ + if (WARN_ON_ONCE(max_sq_wqebbs < MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS)) + return false; + if (WARN_ON_ONCE(max_sq_wqebbs < MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS)) + return false; + if (WARN_ON_ONCE(max_sq_wqebbs < MLX5E_KTLS_GET_PROGRESS_WQEBBS)) + return false; + + return true; +} + +void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + + if (!mlx5e_is_ktls_tx(mdev) && !mlx5e_is_ktls_rx(mdev)) + return; + + if (mlx5e_is_ktls_tx(mdev)) { + netdev->hw_features |= NETIF_F_HW_TLS_TX; + netdev->features |= NETIF_F_HW_TLS_TX; + } + + if (mlx5e_is_ktls_rx(mdev)) + netdev->hw_features |= NETIF_F_HW_TLS_RX; + + netdev->tlsdev_ops = &mlx5e_ktls_ops; +} + +int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err = 0; + + mutex_lock(&priv->state_lock); + if (enable) + err = mlx5e_accel_fs_tcp_create(priv->fs); + else + mlx5e_accel_fs_tcp_destroy(priv->fs); + mutex_unlock(&priv->state_lock); + + return err; +} + +int mlx5e_ktls_init_rx(struct mlx5e_priv *priv) +{ + int err; + + if (!mlx5e_is_ktls_rx(priv->mdev)) + return 0; + + priv->tls->rx_wq = create_singlethread_workqueue("mlx5e_tls_rx"); + if (!priv->tls->rx_wq) + return -ENOMEM; + + if (priv->netdev->features & NETIF_F_HW_TLS_RX) { + err = mlx5e_accel_fs_tcp_create(priv->fs); + if (err) { + destroy_workqueue(priv->tls->rx_wq); + return err; + } + } + + return 0; +} + +void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv) +{ + if (!mlx5e_is_ktls_rx(priv->mdev)) + return; + + if (priv->netdev->features & NETIF_F_HW_TLS_RX) + mlx5e_accel_fs_tcp_destroy(priv->fs); + + destroy_workqueue(priv->tls->rx_wq); +} + +static void mlx5e_tls_debugfs_init(struct mlx5e_tls *tls, + struct dentry *dfs_root) +{ + if (IS_ERR_OR_NULL(dfs_root)) + return; + + tls->debugfs.dfs = debugfs_create_dir("tls", dfs_root); +} + +int mlx5e_ktls_init(struct mlx5e_priv *priv) +{ + struct mlx5e_tls *tls; + + if (!mlx5e_is_ktls_device(priv->mdev)) + return 0; + + tls = kzalloc(sizeof(*tls), GFP_KERNEL); + if (!tls) + return -ENOMEM; + tls->mdev = priv->mdev; + + priv->tls = tls; + + mlx5e_tls_debugfs_init(tls, priv->dfs_root); + + return 0; +} + +void mlx5e_ktls_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5e_tls *tls = priv->tls; + + if (!mlx5e_is_ktls_device(priv->mdev)) + return; + + debugfs_remove_recursive(tls->debugfs.dfs); + tls->debugfs.dfs = NULL; + + kfree(priv->tls); + priv->tls = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h new file mode 100644 index 0000000000..f11075e676 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5E_KTLS_H__ +#define __MLX5E_KTLS_H__ + +#include +#include +#include +#include "en.h" + +#ifdef CONFIG_MLX5_EN_TLS +#include "lib/crypto.h" + +struct mlx5_crypto_dek *mlx5_ktls_create_key(struct mlx5_crypto_dek_pool *dek_pool, + struct tls_crypto_info *crypto_info); +void mlx5_ktls_destroy_key(struct mlx5_crypto_dek_pool *dek_pool, + struct mlx5_crypto_dek *dek); + +static inline bool mlx5e_is_ktls_device(struct mlx5_core_dev *mdev) +{ + if (is_kdump_kernel()) + return false; + + if (!MLX5_CAP_GEN(mdev, tls_tx) && !MLX5_CAP_GEN(mdev, tls_rx)) + return false; + + if (!MLX5_CAP_GEN(mdev, log_max_dek)) + return false; + + return (MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128) || + MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_256)); +} + +static inline bool mlx5e_ktls_type_check(struct mlx5_core_dev *mdev, + struct tls_crypto_info *crypto_info) +{ + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: + if (crypto_info->version == TLS_1_2_VERSION) + return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128); + break; + case TLS_CIPHER_AES_GCM_256: + if (crypto_info->version == TLS_1_2_VERSION) + return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_256); + break; + } + + return false; +} + +void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv); +int mlx5e_ktls_init_tx(struct mlx5e_priv *priv); +void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv); +int mlx5e_ktls_init_rx(struct mlx5e_priv *priv); +void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv); +int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable); +struct mlx5e_ktls_resync_resp * +mlx5e_ktls_rx_resync_create_resp_list(void); +void mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list); + +static inline bool mlx5e_is_ktls_tx(struct mlx5_core_dev *mdev) +{ + return !is_kdump_kernel() && MLX5_CAP_GEN(mdev, tls_tx); +} + +bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev); + +struct mlx5e_tls_sw_stats { + atomic64_t tx_tls_ctx; + atomic64_t tx_tls_del; + atomic64_t tx_tls_pool_alloc; + atomic64_t tx_tls_pool_free; + atomic64_t rx_tls_ctx; + atomic64_t rx_tls_del; +}; + +struct mlx5e_tls_debugfs { + struct dentry *dfs; + struct dentry *dfs_tx; +}; + +struct mlx5e_tls { + struct mlx5_core_dev *mdev; + struct mlx5e_tls_sw_stats sw_stats; + struct workqueue_struct *rx_wq; + struct mlx5e_tls_tx_pool *tx_pool; + struct mlx5_crypto_dek_pool *dek_pool; + struct mlx5e_tls_debugfs debugfs; +}; + +int mlx5e_ktls_init(struct mlx5e_priv *priv); +void mlx5e_ktls_cleanup(struct mlx5e_priv *priv); + +int mlx5e_ktls_get_count(struct mlx5e_priv *priv); +int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data); +int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data); + +#else +static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv) +{ +} + +static inline int mlx5e_ktls_init_tx(struct mlx5e_priv *priv) +{ + return 0; +} + +static inline void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv) +{ +} + +static inline int mlx5e_ktls_init_rx(struct mlx5e_priv *priv) +{ + return 0; +} + +static inline void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv) +{ +} + +static inline int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable) +{ + netdev_warn(netdev, "kTLS is not supported\n"); + return -EOPNOTSUPP; +} + +static inline struct mlx5e_ktls_resync_resp * +mlx5e_ktls_rx_resync_create_resp_list(void) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void +mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list) {} + +static inline bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev) +{ + return false; +} + +static inline int mlx5e_ktls_init(struct mlx5e_priv *priv) { return 0; } +static inline void mlx5e_ktls_cleanup(struct mlx5e_priv *priv) { } +static inline int mlx5e_ktls_get_count(struct mlx5e_priv *priv) { return 0; } +static inline int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data) +{ + return 0; +} + +static inline int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data) +{ + return 0; +} +#endif + +#endif /* __MLX5E_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c new file mode 100644 index 0000000000..9b597cb245 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -0,0 +1,785 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies. + +#include +#include "en_accel/en_accel.h" +#include "en_accel/ktls.h" +#include "en_accel/ktls_txrx.h" +#include "en_accel/ktls_utils.h" +#include "en_accel/fs_tcp.h" + +struct accel_rule { + struct work_struct work; + struct mlx5e_priv *priv; + struct mlx5_flow_handle *rule; +}; + +#define PROGRESS_PARAMS_WRITE_UNIT 64 +#define PROGRESS_PARAMS_PADDED_SIZE \ + (ALIGN(sizeof(struct mlx5_wqe_tls_progress_params_seg), \ + PROGRESS_PARAMS_WRITE_UNIT)) + +struct mlx5e_ktls_rx_resync_buf { + union { + struct mlx5_wqe_tls_progress_params_seg progress; + u8 pad[PROGRESS_PARAMS_PADDED_SIZE]; + } ____cacheline_aligned_in_smp; + dma_addr_t dma_addr; + struct mlx5e_ktls_offload_context_rx *priv_rx; +}; + +enum { + MLX5E_PRIV_RX_FLAG_DELETING, + MLX5E_NUM_PRIV_RX_FLAGS, +}; + +struct mlx5e_ktls_rx_resync_ctx { + struct tls_offload_resync_async core; + struct work_struct work; + struct mlx5e_priv *priv; + refcount_t refcnt; + __be64 sw_rcd_sn_be; + u32 seq; +}; + +struct mlx5e_ktls_offload_context_rx { + union mlx5e_crypto_info crypto_info; + struct accel_rule rule; + struct sock *sk; + struct mlx5e_rq_stats *rq_stats; + struct mlx5e_tls_sw_stats *sw_stats; + struct completion add_ctx; + struct mlx5e_tir tir; + struct mlx5_crypto_dek *dek; + u32 rxq; + DECLARE_BITMAP(flags, MLX5E_NUM_PRIV_RX_FLAGS); + + /* resync */ + spinlock_t lock; /* protects resync fields */ + struct mlx5e_ktls_rx_resync_ctx resync; + struct list_head list; +}; + +static bool mlx5e_ktls_priv_rx_put(struct mlx5e_ktls_offload_context_rx *priv_rx) +{ + if (!refcount_dec_and_test(&priv_rx->resync.refcnt)) + return false; + + kfree(priv_rx); + return true; +} + +static void mlx5e_ktls_priv_rx_get(struct mlx5e_ktls_offload_context_rx *priv_rx) +{ + refcount_inc(&priv_rx->resync.refcnt); +} + +struct mlx5e_ktls_resync_resp { + /* protects list changes */ + spinlock_t lock; + struct list_head list; +}; + +void mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list) +{ + kvfree(resp_list); +} + +struct mlx5e_ktls_resync_resp * +mlx5e_ktls_rx_resync_create_resp_list(void) +{ + struct mlx5e_ktls_resync_resp *resp_list; + + resp_list = kvzalloc(sizeof(*resp_list), GFP_KERNEL); + if (!resp_list) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&resp_list->list); + spin_lock_init(&resp_list->lock); + + return resp_list; +} + +static void accel_rule_handle_work(struct work_struct *work) +{ + struct mlx5e_ktls_offload_context_rx *priv_rx; + struct accel_rule *accel_rule; + struct mlx5_flow_handle *rule; + + accel_rule = container_of(work, struct accel_rule, work); + priv_rx = container_of(accel_rule, struct mlx5e_ktls_offload_context_rx, rule); + if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) + goto out; + + rule = mlx5e_accel_fs_add_sk(accel_rule->priv->fs, priv_rx->sk, + mlx5e_tir_get_tirn(&priv_rx->tir), + MLX5_FS_DEFAULT_FLOW_TAG); + if (!IS_ERR_OR_NULL(rule)) + accel_rule->rule = rule; +out: + complete(&priv_rx->add_ctx); +} + +static void accel_rule_init(struct accel_rule *rule, struct mlx5e_priv *priv) +{ + INIT_WORK(&rule->work, accel_rule_handle_work); + rule->priv = priv; +} + +static void icosq_fill_wi(struct mlx5e_icosq *sq, u16 pi, + struct mlx5e_icosq_wqe_info *wi) +{ + sq->db.wqe_info[pi] = *wi; +} + +static struct mlx5_wqe_ctrl_seg * +post_static_params(struct mlx5e_icosq *sq, + struct mlx5e_ktls_offload_context_rx *priv_rx) +{ + struct mlx5e_set_tls_static_params_wqe *wqe; + struct mlx5e_icosq_wqe_info wi; + u16 pi, num_wqebbs; + + num_wqebbs = MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS; + if (unlikely(!mlx5e_icosq_can_post_wqe(sq, num_wqebbs))) + return ERR_PTR(-ENOSPC); + + pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs); + wqe = MLX5E_TLS_FETCH_SET_STATIC_PARAMS_WQE(sq, pi); + mlx5e_ktls_build_static_params(wqe, sq->pc, sq->sqn, &priv_rx->crypto_info, + mlx5e_tir_get_tirn(&priv_rx->tir), + mlx5_crypto_dek_get_id(priv_rx->dek), + priv_rx->resync.seq, false, + TLS_OFFLOAD_CTX_DIR_RX); + wi = (struct mlx5e_icosq_wqe_info) { + .wqe_type = MLX5E_ICOSQ_WQE_UMR_TLS, + .num_wqebbs = num_wqebbs, + .tls_set_params.priv_rx = priv_rx, + }; + icosq_fill_wi(sq, pi, &wi); + sq->pc += num_wqebbs; + + return &wqe->ctrl; +} + +static struct mlx5_wqe_ctrl_seg * +post_progress_params(struct mlx5e_icosq *sq, + struct mlx5e_ktls_offload_context_rx *priv_rx, + u32 next_record_tcp_sn) +{ + struct mlx5e_set_tls_progress_params_wqe *wqe; + struct mlx5e_icosq_wqe_info wi; + u16 pi, num_wqebbs; + + num_wqebbs = MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS; + if (unlikely(!mlx5e_icosq_can_post_wqe(sq, num_wqebbs))) + return ERR_PTR(-ENOSPC); + + pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs); + wqe = MLX5E_TLS_FETCH_SET_PROGRESS_PARAMS_WQE(sq, pi); + mlx5e_ktls_build_progress_params(wqe, sq->pc, sq->sqn, + mlx5e_tir_get_tirn(&priv_rx->tir), + false, next_record_tcp_sn, + TLS_OFFLOAD_CTX_DIR_RX); + wi = (struct mlx5e_icosq_wqe_info) { + .wqe_type = MLX5E_ICOSQ_WQE_SET_PSV_TLS, + .num_wqebbs = num_wqebbs, + .tls_set_params.priv_rx = priv_rx, + }; + + icosq_fill_wi(sq, pi, &wi); + sq->pc += num_wqebbs; + + return &wqe->ctrl; +} + +static int post_rx_param_wqes(struct mlx5e_channel *c, + struct mlx5e_ktls_offload_context_rx *priv_rx, + u32 next_record_tcp_sn) +{ + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5e_icosq *sq; + int err; + + err = 0; + sq = &c->async_icosq; + spin_lock_bh(&c->async_icosq_lock); + + cseg = post_static_params(sq, priv_rx); + if (IS_ERR(cseg)) + goto err_out; + cseg = post_progress_params(sq, priv_rx, next_record_tcp_sn); + if (IS_ERR(cseg)) + goto err_out; + + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); +unlock: + spin_unlock_bh(&c->async_icosq_lock); + + return err; + +err_out: + priv_rx->rq_stats->tls_resync_req_skip++; + err = PTR_ERR(cseg); + complete(&priv_rx->add_ctx); + goto unlock; +} + +static void +mlx5e_set_ktls_rx_priv_ctx(struct tls_context *tls_ctx, + struct mlx5e_ktls_offload_context_rx *priv_rx) +{ + struct mlx5e_ktls_offload_context_rx **ctx = + __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_RX); + + BUILD_BUG_ON(sizeof(priv_rx) > TLS_DRIVER_STATE_SIZE_RX); + + *ctx = priv_rx; +} + +static struct mlx5e_ktls_offload_context_rx * +mlx5e_get_ktls_rx_priv_ctx(struct tls_context *tls_ctx) +{ + struct mlx5e_ktls_offload_context_rx **ctx = + __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_RX); + + return *ctx; +} + +/* Re-sync */ +/* Runs in work context */ +static int +resync_post_get_progress_params(struct mlx5e_icosq *sq, + struct mlx5e_ktls_offload_context_rx *priv_rx) +{ + struct mlx5e_get_tls_progress_params_wqe *wqe; + struct mlx5e_ktls_rx_resync_buf *buf; + struct mlx5e_icosq_wqe_info wi; + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5_seg_get_psv *psv; + struct device *pdev; + int err; + u16 pi; + + buf = kzalloc(sizeof(*buf), GFP_KERNEL); + if (unlikely(!buf)) { + err = -ENOMEM; + goto err_out; + } + + pdev = mlx5_core_dma_dev(sq->channel->priv->mdev); + buf->dma_addr = dma_map_single(pdev, &buf->progress, + PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(pdev, buf->dma_addr))) { + err = -ENOMEM; + goto err_free; + } + + buf->priv_rx = priv_rx; + + spin_lock_bh(&sq->channel->async_icosq_lock); + + if (unlikely(!mlx5e_icosq_can_post_wqe(sq, MLX5E_KTLS_GET_PROGRESS_WQEBBS))) { + spin_unlock_bh(&sq->channel->async_icosq_lock); + err = -ENOSPC; + goto err_dma_unmap; + } + + pi = mlx5e_icosq_get_next_pi(sq, MLX5E_KTLS_GET_PROGRESS_WQEBBS); + wqe = MLX5E_TLS_FETCH_GET_PROGRESS_PARAMS_WQE(sq, pi); + +#define GET_PSV_DS_CNT (DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS)) + + cseg = &wqe->ctrl; + cseg->opmod_idx_opcode = + cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_GET_PSV | + (MLX5_OPC_MOD_TLS_TIR_PROGRESS_PARAMS << 24)); + cseg->qpn_ds = + cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | GET_PSV_DS_CNT); + + psv = &wqe->psv; + psv->num_psv = 1 << 4; + psv->l_key = sq->channel->mkey_be; + psv->psv_index[0] = cpu_to_be32(mlx5e_tir_get_tirn(&priv_rx->tir)); + psv->va = cpu_to_be64(buf->dma_addr); + + wi = (struct mlx5e_icosq_wqe_info) { + .wqe_type = MLX5E_ICOSQ_WQE_GET_PSV_TLS, + .num_wqebbs = MLX5E_KTLS_GET_PROGRESS_WQEBBS, + .tls_get_params.buf = buf, + }; + icosq_fill_wi(sq, pi, &wi); + sq->pc++; + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); + spin_unlock_bh(&sq->channel->async_icosq_lock); + + return 0; + +err_dma_unmap: + dma_unmap_single(pdev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); +err_free: + kfree(buf); +err_out: + priv_rx->rq_stats->tls_resync_req_skip++; + return err; +} + +/* Function is called with elevated refcount. + * It decreases it only if no WQE is posted. + */ +static void resync_handle_work(struct work_struct *work) +{ + struct mlx5e_ktls_offload_context_rx *priv_rx; + struct mlx5e_ktls_rx_resync_ctx *resync; + struct mlx5e_channel *c; + struct mlx5e_icosq *sq; + + resync = container_of(work, struct mlx5e_ktls_rx_resync_ctx, work); + priv_rx = container_of(resync, struct mlx5e_ktls_offload_context_rx, resync); + + if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) { + mlx5e_ktls_priv_rx_put(priv_rx); + return; + } + + c = resync->priv->channels.c[priv_rx->rxq]; + sq = &c->async_icosq; + + if (resync_post_get_progress_params(sq, priv_rx)) + mlx5e_ktls_priv_rx_put(priv_rx); +} + +static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync, + struct mlx5e_priv *priv) +{ + INIT_WORK(&resync->work, resync_handle_work); + resync->priv = priv; + refcount_set(&resync->refcnt, 1); +} + +/* Function can be called with the refcount being either elevated or not. + * It does not affect the refcount. + */ +static void resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_rx, + struct mlx5e_channel *c) +{ + struct mlx5e_ktls_resync_resp *ktls_resync; + struct mlx5e_icosq *sq; + bool trigger_poll; + + sq = &c->async_icosq; + ktls_resync = sq->ktls_resync; + trigger_poll = false; + + spin_lock_bh(&ktls_resync->lock); + spin_lock_bh(&priv_rx->lock); + switch (priv_rx->crypto_info.crypto_info.cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 *info = + &priv_rx->crypto_info.crypto_info_128; + + memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be, + sizeof(info->rec_seq)); + break; + } + case TLS_CIPHER_AES_GCM_256: { + struct tls12_crypto_info_aes_gcm_256 *info = + &priv_rx->crypto_info.crypto_info_256; + + memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be, + sizeof(info->rec_seq)); + break; + } + default: + WARN_ONCE(1, "Unsupported cipher type %u\n", + priv_rx->crypto_info.crypto_info.cipher_type); + spin_unlock_bh(&priv_rx->lock); + spin_unlock_bh(&ktls_resync->lock); + return; + } + + if (list_empty(&priv_rx->list)) { + list_add_tail(&priv_rx->list, &ktls_resync->list); + trigger_poll = !test_and_set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state); + } + spin_unlock_bh(&priv_rx->lock); + spin_unlock_bh(&ktls_resync->lock); + + if (!trigger_poll) + return; + + if (!napi_if_scheduled_mark_missed(&c->napi)) { + spin_lock_bh(&c->async_icosq_lock); + mlx5e_trigger_irq(sq); + spin_unlock_bh(&c->async_icosq_lock); + } +} + +/* Function can be called with the refcount being either elevated or not. + * It decreases the refcount and may free the kTLS priv context. + * Refcount is not elevated only if tls_dev_del has been called, but GET_PSV was + * already in flight. + */ +void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, + struct mlx5e_icosq *sq) +{ + struct mlx5e_ktls_rx_resync_buf *buf = wi->tls_get_params.buf; + struct mlx5e_ktls_offload_context_rx *priv_rx; + struct mlx5e_ktls_rx_resync_ctx *resync; + u8 tracker_state, auth_state, *ctx; + struct device *dev; + u32 hw_seq; + + priv_rx = buf->priv_rx; + resync = &priv_rx->resync; + dev = mlx5_core_dma_dev(resync->priv->mdev); + if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) + goto out; + + dma_sync_single_for_cpu(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, + DMA_FROM_DEVICE); + + ctx = buf->progress.ctx; + tracker_state = MLX5_GET(tls_progress_params, ctx, record_tracker_state); + auth_state = MLX5_GET(tls_progress_params, ctx, auth_state); + if (tracker_state != MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING || + auth_state != MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD) { + priv_rx->rq_stats->tls_resync_req_skip++; + goto out; + } + + hw_seq = MLX5_GET(tls_progress_params, ctx, hw_resync_tcp_sn); + tls_offload_rx_resync_async_request_end(priv_rx->sk, cpu_to_be32(hw_seq)); + priv_rx->rq_stats->tls_resync_req_end++; +out: + mlx5e_ktls_priv_rx_put(priv_rx); + dma_unmap_single(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); + kfree(buf); +} + +/* Runs in NAPI. + * Function elevates the refcount, unless no work is queued. + */ +static bool resync_queue_get_psv(struct sock *sk) +{ + struct mlx5e_ktls_offload_context_rx *priv_rx; + struct mlx5e_ktls_rx_resync_ctx *resync; + + priv_rx = mlx5e_get_ktls_rx_priv_ctx(tls_get_ctx(sk)); + if (unlikely(!priv_rx)) + return false; + + if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) + return false; + + resync = &priv_rx->resync; + mlx5e_ktls_priv_rx_get(priv_rx); + if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work))) + mlx5e_ktls_priv_rx_put(priv_rx); + + return true; +} + +/* Runs in NAPI */ +static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb) +{ + struct ethhdr *eth = (struct ethhdr *)(skb->data); + struct net_device *netdev = rq->netdev; + struct net *net = dev_net(netdev); + struct sock *sk = NULL; + unsigned int datalen; + struct iphdr *iph; + struct tcphdr *th; + __be32 seq; + int depth = 0; + + __vlan_get_protocol(skb, eth->h_proto, &depth); + iph = (struct iphdr *)(skb->data + depth); + + if (iph->version == 4) { + depth += sizeof(struct iphdr); + th = (void *)iph + sizeof(struct iphdr); + + sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, + iph->saddr, th->source, iph->daddr, + th->dest, netdev->ifindex); +#if IS_ENABLED(CONFIG_IPV6) + } else { + struct ipv6hdr *ipv6h = (struct ipv6hdr *)iph; + + depth += sizeof(struct ipv6hdr); + th = (void *)ipv6h + sizeof(struct ipv6hdr); + + sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, + &ipv6h->saddr, th->source, + &ipv6h->daddr, ntohs(th->dest), + netdev->ifindex, 0); +#endif + } + + depth += sizeof(struct tcphdr); + + if (unlikely(!sk)) + return; + + if (unlikely(sk->sk_state == TCP_TIME_WAIT)) + goto unref; + + if (unlikely(!resync_queue_get_psv(sk))) + goto unref; + + seq = th->seq; + datalen = skb->len - depth; + tls_offload_rx_resync_async_request_start(sk, seq, datalen); + rq->stats->tls_resync_req_start++; + +unref: + sock_gen_put(sk); +} + +void mlx5e_ktls_rx_resync(struct net_device *netdev, struct sock *sk, + u32 seq, u8 *rcd_sn) +{ + struct mlx5e_ktls_offload_context_rx *priv_rx; + struct mlx5e_ktls_rx_resync_ctx *resync; + struct mlx5e_priv *priv; + struct mlx5e_channel *c; + + priv_rx = mlx5e_get_ktls_rx_priv_ctx(tls_get_ctx(sk)); + if (unlikely(!priv_rx)) + return; + + resync = &priv_rx->resync; + resync->sw_rcd_sn_be = *(__be64 *)rcd_sn; + resync->seq = seq; + + priv = netdev_priv(netdev); + c = priv->channels.c[priv_rx->rxq]; + + resync_handle_seq_match(priv_rx, c); +} + +/* End of resync section */ + +void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb, + struct mlx5_cqe64 *cqe, u32 *cqe_bcnt) +{ + struct mlx5e_rq_stats *stats = rq->stats; + + switch (get_cqe_tls_offload(cqe)) { + case CQE_TLS_OFFLOAD_DECRYPTED: + skb->decrypted = 1; + stats->tls_decrypted_packets++; + stats->tls_decrypted_bytes += *cqe_bcnt; + break; + case CQE_TLS_OFFLOAD_RESYNC: + stats->tls_resync_req_pkt++; + resync_update_sn(rq, skb); + break; + default: /* CQE_TLS_OFFLOAD_ERROR: */ + stats->tls_err++; + break; + } +} + +void mlx5e_ktls_handle_ctx_completion(struct mlx5e_icosq_wqe_info *wi) +{ + struct mlx5e_ktls_offload_context_rx *priv_rx = wi->tls_set_params.priv_rx; + struct accel_rule *rule = &priv_rx->rule; + + if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) { + complete(&priv_rx->add_ctx); + return; + } + queue_work(rule->priv->tls->rx_wq, &rule->work); +} + +static int mlx5e_ktls_sk_get_rxq(struct sock *sk) +{ + int rxq = sk_rx_queue_get(sk); + + if (unlikely(rxq == -1)) + rxq = 0; + + return rxq; +} + +int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn) +{ + struct mlx5e_ktls_offload_context_rx *priv_rx; + struct mlx5e_ktls_rx_resync_ctx *resync; + struct tls_context *tls_ctx; + struct mlx5_crypto_dek *dek; + struct mlx5e_priv *priv; + int rxq, err; + + tls_ctx = tls_get_ctx(sk); + priv = netdev_priv(netdev); + priv_rx = kzalloc(sizeof(*priv_rx), GFP_KERNEL); + if (unlikely(!priv_rx)) + return -ENOMEM; + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: + priv_rx->crypto_info.crypto_info_128 = + *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + break; + case TLS_CIPHER_AES_GCM_256: + priv_rx->crypto_info.crypto_info_256 = + *(struct tls12_crypto_info_aes_gcm_256 *)crypto_info; + break; + default: + WARN_ONCE(1, "Unsupported cipher type %u\n", + crypto_info->cipher_type); + err = -EOPNOTSUPP; + goto err_cipher_type; + } + + dek = mlx5_ktls_create_key(priv->tls->dek_pool, crypto_info); + if (IS_ERR(dek)) { + err = PTR_ERR(dek); + goto err_cipher_type; + } + priv_rx->dek = dek; + + INIT_LIST_HEAD(&priv_rx->list); + spin_lock_init(&priv_rx->lock); + + rxq = mlx5e_ktls_sk_get_rxq(sk); + priv_rx->rxq = rxq; + priv_rx->sk = sk; + + priv_rx->rq_stats = &priv->channel_stats[rxq]->rq; + priv_rx->sw_stats = &priv->tls->sw_stats; + mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx); + + err = mlx5e_rx_res_tls_tir_create(priv->rx_res, rxq, &priv_rx->tir); + if (err) + goto err_create_tir; + + init_completion(&priv_rx->add_ctx); + + accel_rule_init(&priv_rx->rule, priv); + resync = &priv_rx->resync; + resync_init(resync, priv); + tls_offload_ctx_rx(tls_ctx)->resync_async = &resync->core; + tls_offload_rx_resync_set_type(sk, TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ_ASYNC); + + err = post_rx_param_wqes(priv->channels.c[rxq], priv_rx, start_offload_tcp_sn); + if (err) + goto err_post_wqes; + + atomic64_inc(&priv_rx->sw_stats->rx_tls_ctx); + + return 0; + +err_post_wqes: + mlx5e_tir_destroy(&priv_rx->tir); +err_create_tir: + mlx5_ktls_destroy_key(priv->tls->dek_pool, priv_rx->dek); +err_cipher_type: + kfree(priv_rx); + return err; +} + +void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx) +{ + struct mlx5e_ktls_offload_context_rx *priv_rx; + struct mlx5e_ktls_rx_resync_ctx *resync; + struct mlx5e_priv *priv; + + priv = netdev_priv(netdev); + + priv_rx = mlx5e_get_ktls_rx_priv_ctx(tls_ctx); + set_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags); + mlx5e_set_ktls_rx_priv_ctx(tls_ctx, NULL); + synchronize_net(); /* Sync with NAPI */ + if (!cancel_work_sync(&priv_rx->rule.work)) + /* completion is needed, as the priv_rx in the add flow + * is maintained on the wqe info (wi), not on the socket. + */ + wait_for_completion(&priv_rx->add_ctx); + resync = &priv_rx->resync; + if (cancel_work_sync(&resync->work)) + mlx5e_ktls_priv_rx_put(priv_rx); + + atomic64_inc(&priv_rx->sw_stats->rx_tls_del); + if (priv_rx->rule.rule) + mlx5e_accel_fs_del_sk(priv_rx->rule.rule); + + mlx5e_tir_destroy(&priv_rx->tir); + mlx5_ktls_destroy_key(priv->tls->dek_pool, priv_rx->dek); + /* priv_rx should normally be freed here, but if there is an outstanding + * GET_PSV, deallocation will be delayed until the CQE for GET_PSV is + * processed. + */ + mlx5e_ktls_priv_rx_put(priv_rx); +} + +bool mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget) +{ + struct mlx5e_ktls_offload_context_rx *priv_rx, *tmp; + struct mlx5e_ktls_resync_resp *ktls_resync; + struct mlx5_wqe_ctrl_seg *db_cseg; + struct mlx5e_icosq *sq; + LIST_HEAD(local_list); + int i, j; + + sq = &c->async_icosq; + + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + return false; + + ktls_resync = sq->ktls_resync; + db_cseg = NULL; + i = 0; + + spin_lock(&ktls_resync->lock); + list_for_each_entry_safe(priv_rx, tmp, &ktls_resync->list, list) { + list_move(&priv_rx->list, &local_list); + if (++i == budget) + break; + } + if (list_empty(&ktls_resync->list)) + clear_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state); + spin_unlock(&ktls_resync->lock); + + spin_lock(&c->async_icosq_lock); + for (j = 0; j < i; j++) { + struct mlx5_wqe_ctrl_seg *cseg; + + priv_rx = list_first_entry(&local_list, + struct mlx5e_ktls_offload_context_rx, + list); + spin_lock(&priv_rx->lock); + cseg = post_static_params(sq, priv_rx); + if (IS_ERR(cseg)) { + spin_unlock(&priv_rx->lock); + break; + } + list_del_init(&priv_rx->list); + spin_unlock(&priv_rx->lock); + db_cseg = cseg; + } + if (db_cseg) + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, db_cseg); + spin_unlock(&c->async_icosq_lock); + + priv_rx->rq_stats->tls_resync_res_ok += j; + + if (!list_empty(&local_list)) { + /* This happens only if ICOSQ is full. + * There is no need to mark busy or explicitly ask for a NAPI cycle, + * it will be triggered by the outstanding ICOSQ completions. + */ + spin_lock(&ktls_resync->lock); + list_splice(&local_list, &ktls_resync->list); + set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state); + spin_unlock(&ktls_resync->lock); + priv_rx->rq_stats->tls_resync_res_retry++; + } + + return i == budget; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c new file mode 100644 index 0000000000..7c1c0eb167 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include +#include + +#include "en.h" +#include "fpga/sdk.h" +#include "en_accel/ktls.h" + +static const struct counter_desc mlx5e_ktls_sw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_ctx) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_del) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_pool_alloc) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_pool_free) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, rx_tls_ctx) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, rx_tls_del) }, +}; + +#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \ + atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset)) + +int mlx5e_ktls_get_count(struct mlx5e_priv *priv) +{ + if (!priv->tls) + return 0; + + return ARRAY_SIZE(mlx5e_ktls_sw_stats_desc); +} + +int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data) +{ + unsigned int i, n, idx = 0; + + if (!priv->tls) + return 0; + + n = mlx5e_ktls_get_count(priv); + + for (i = 0; i < n; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + mlx5e_ktls_sw_stats_desc[i].format); + + return n; +} + +int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data) +{ + unsigned int i, n, idx = 0; + + if (!priv->tls) + return 0; + + n = mlx5e_ktls_get_count(priv); + + for (i = 0; i < n; i++) + data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->tls->sw_stats, + mlx5e_ktls_sw_stats_desc, + i); + + return n; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c new file mode 100644 index 0000000000..d61be26a4d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -0,0 +1,958 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies. + +#include +#include "en_accel/ktls.h" +#include "en_accel/ktls_txrx.h" +#include "en_accel/ktls_utils.h" + +struct mlx5e_dump_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_data_seg data; +}; + +#define MLX5E_KTLS_DUMP_WQEBBS \ + (DIV_ROUND_UP(sizeof(struct mlx5e_dump_wqe), MLX5_SEND_WQE_BB)) + +static u8 +mlx5e_ktls_dumps_num_wqes(struct mlx5e_params *params, unsigned int nfrags, + unsigned int sync_len) +{ + /* Given the MTU and sync_len, calculates an upper bound for the + * number of DUMP WQEs needed for the TX resync of a record. + */ + return nfrags + DIV_ROUND_UP(sync_len, MLX5E_SW2HW_MTU(params, params->sw_mtu)); +} + +u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +{ + u16 num_dumps, stop_room = 0; + + if (!mlx5e_is_ktls_tx(mdev)) + return 0; + + num_dumps = mlx5e_ktls_dumps_num_wqes(params, MAX_SKB_FRAGS, TLS_MAX_PAYLOAD_SIZE); + + stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS); + stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS); + stop_room += num_dumps * mlx5e_stop_room_for_wqe(mdev, MLX5E_KTLS_DUMP_WQEBBS); + stop_room += 1; /* fence nop */ + + return stop_room; +} + +static void mlx5e_ktls_set_tisc(struct mlx5_core_dev *mdev, void *tisc) +{ + MLX5_SET(tisc, tisc, tls_en, 1); + MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.hw_objs.pdn); + MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn); +} + +static int mlx5e_ktls_create_tis(struct mlx5_core_dev *mdev, u32 *tisn) +{ + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; + + mlx5e_ktls_set_tisc(mdev, MLX5_ADDR_OF(create_tis_in, in, ctx)); + + return mlx5_core_create_tis(mdev, in, tisn); +} + +static int mlx5e_ktls_create_tis_cb(struct mlx5_core_dev *mdev, + struct mlx5_async_ctx *async_ctx, + u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context) +{ + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; + + mlx5e_ktls_set_tisc(mdev, MLX5_ADDR_OF(create_tis_in, in, ctx)); + MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS); + + return mlx5_cmd_exec_cb(async_ctx, in, sizeof(in), + out, outlen, callback, context); +} + +static int mlx5e_ktls_destroy_tis_cb(struct mlx5_core_dev *mdev, u32 tisn, + struct mlx5_async_ctx *async_ctx, + u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context) +{ + u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {}; + + MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, in, tisn, tisn); + + return mlx5_cmd_exec_cb(async_ctx, in, sizeof(in), + out, outlen, callback, context); +} + +struct mlx5e_ktls_offload_context_tx { + /* fast path */ + u32 expected_seq; + u32 tisn; + bool ctx_post_pending; + /* control / resync */ + struct list_head list_node; /* member of the pool */ + union mlx5e_crypto_info crypto_info; + struct tls_offload_context_tx *tx_ctx; + struct mlx5_core_dev *mdev; + struct mlx5e_tls_sw_stats *sw_stats; + struct mlx5_crypto_dek *dek; + u8 create_err : 1; +}; + +static void +mlx5e_set_ktls_tx_priv_ctx(struct tls_context *tls_ctx, + struct mlx5e_ktls_offload_context_tx *priv_tx) +{ + struct mlx5e_ktls_offload_context_tx **ctx = + __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_TX); + + BUILD_BUG_ON(sizeof(priv_tx) > TLS_DRIVER_STATE_SIZE_TX); + + *ctx = priv_tx; +} + +static struct mlx5e_ktls_offload_context_tx * +mlx5e_get_ktls_tx_priv_ctx(struct tls_context *tls_ctx) +{ + struct mlx5e_ktls_offload_context_tx **ctx = + __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_TX); + + return *ctx; +} + +/* struct for callback API management */ +struct mlx5e_async_ctx { + struct mlx5_async_work context; + struct mlx5_async_ctx *async_ctx; + struct mlx5e_ktls_offload_context_tx *priv_tx; + int err; + union { + u32 out_create[MLX5_ST_SZ_DW(create_tis_out)]; + u32 out_destroy[MLX5_ST_SZ_DW(destroy_tis_out)]; + }; +}; + +struct mlx5e_bulk_async_ctx { + struct mlx5_async_ctx async_ctx; + DECLARE_FLEX_ARRAY(struct mlx5e_async_ctx, arr); +}; + +static struct mlx5e_bulk_async_ctx *mlx5e_bulk_async_init(struct mlx5_core_dev *mdev, int n) +{ + struct mlx5e_bulk_async_ctx *bulk_async; + int sz; + int i; + + sz = struct_size(bulk_async, arr, n); + bulk_async = kvzalloc(sz, GFP_KERNEL); + if (!bulk_async) + return NULL; + + mlx5_cmd_init_async_ctx(mdev, &bulk_async->async_ctx); + + for (i = 0; i < n; i++) + bulk_async->arr[i].async_ctx = &bulk_async->async_ctx; + + return bulk_async; +} + +static void mlx5e_bulk_async_cleanup(struct mlx5e_bulk_async_ctx *bulk_async) +{ + mlx5_cmd_cleanup_async_ctx(&bulk_async->async_ctx); + kvfree(bulk_async); +} + +static void create_tis_callback(int status, struct mlx5_async_work *context) +{ + struct mlx5e_async_ctx *async = + container_of(context, struct mlx5e_async_ctx, context); + struct mlx5e_ktls_offload_context_tx *priv_tx = async->priv_tx; + + if (status) { + async->err = status; + priv_tx->create_err = 1; + return; + } + + priv_tx->tisn = MLX5_GET(create_tis_out, async->out_create, tisn); +} + +static void destroy_tis_callback(int status, struct mlx5_async_work *context) +{ + struct mlx5e_async_ctx *async = + container_of(context, struct mlx5e_async_ctx, context); + struct mlx5e_ktls_offload_context_tx *priv_tx = async->priv_tx; + + kfree(priv_tx); +} + +static struct mlx5e_ktls_offload_context_tx * +mlx5e_tls_priv_tx_init(struct mlx5_core_dev *mdev, struct mlx5e_tls_sw_stats *sw_stats, + struct mlx5e_async_ctx *async) +{ + struct mlx5e_ktls_offload_context_tx *priv_tx; + int err; + + priv_tx = kzalloc(sizeof(*priv_tx), GFP_KERNEL); + if (!priv_tx) + return ERR_PTR(-ENOMEM); + + priv_tx->mdev = mdev; + priv_tx->sw_stats = sw_stats; + + if (!async) { + err = mlx5e_ktls_create_tis(mdev, &priv_tx->tisn); + if (err) + goto err_out; + } else { + async->priv_tx = priv_tx; + err = mlx5e_ktls_create_tis_cb(mdev, async->async_ctx, + async->out_create, sizeof(async->out_create), + create_tis_callback, &async->context); + if (err) + goto err_out; + } + + return priv_tx; + +err_out: + kfree(priv_tx); + return ERR_PTR(err); +} + +static void mlx5e_tls_priv_tx_cleanup(struct mlx5e_ktls_offload_context_tx *priv_tx, + struct mlx5e_async_ctx *async) +{ + if (priv_tx->create_err) { + kfree(priv_tx); + return; + } + async->priv_tx = priv_tx; + mlx5e_ktls_destroy_tis_cb(priv_tx->mdev, priv_tx->tisn, + async->async_ctx, + async->out_destroy, sizeof(async->out_destroy), + destroy_tis_callback, &async->context); +} + +static void mlx5e_tls_priv_tx_list_cleanup(struct mlx5_core_dev *mdev, + struct list_head *list, int size) +{ + struct mlx5e_ktls_offload_context_tx *obj, *n; + struct mlx5e_bulk_async_ctx *bulk_async; + int i; + + bulk_async = mlx5e_bulk_async_init(mdev, size); + if (!bulk_async) + return; + + i = 0; + list_for_each_entry_safe(obj, n, list, list_node) { + mlx5e_tls_priv_tx_cleanup(obj, &bulk_async->arr[i]); + i++; + } + + mlx5e_bulk_async_cleanup(bulk_async); +} + +/* Recycling pool API */ + +#define MLX5E_TLS_TX_POOL_BULK (16) +#define MLX5E_TLS_TX_POOL_HIGH (4 * 1024) +#define MLX5E_TLS_TX_POOL_LOW (MLX5E_TLS_TX_POOL_HIGH / 4) + +struct mlx5e_tls_tx_pool { + struct mlx5_core_dev *mdev; + struct mlx5e_tls_sw_stats *sw_stats; + struct mutex lock; /* Protects access to the pool */ + struct list_head list; + size_t size; + + struct workqueue_struct *wq; + struct work_struct create_work; + struct work_struct destroy_work; +}; + +static void create_work(struct work_struct *work) +{ + struct mlx5e_tls_tx_pool *pool = + container_of(work, struct mlx5e_tls_tx_pool, create_work); + struct mlx5e_ktls_offload_context_tx *obj; + struct mlx5e_bulk_async_ctx *bulk_async; + LIST_HEAD(local_list); + int i, j, err = 0; + + bulk_async = mlx5e_bulk_async_init(pool->mdev, MLX5E_TLS_TX_POOL_BULK); + if (!bulk_async) + return; + + for (i = 0; i < MLX5E_TLS_TX_POOL_BULK; i++) { + obj = mlx5e_tls_priv_tx_init(pool->mdev, pool->sw_stats, &bulk_async->arr[i]); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + break; + } + list_add(&obj->list_node, &local_list); + } + + for (j = 0; j < i; j++) { + struct mlx5e_async_ctx *async = &bulk_async->arr[j]; + + if (!err && async->err) + err = async->err; + } + atomic64_add(i, &pool->sw_stats->tx_tls_pool_alloc); + mlx5e_bulk_async_cleanup(bulk_async); + if (err) + goto err_out; + + mutex_lock(&pool->lock); + if (pool->size + MLX5E_TLS_TX_POOL_BULK >= MLX5E_TLS_TX_POOL_HIGH) { + mutex_unlock(&pool->lock); + goto err_out; + } + list_splice(&local_list, &pool->list); + pool->size += MLX5E_TLS_TX_POOL_BULK; + if (pool->size <= MLX5E_TLS_TX_POOL_LOW) + queue_work(pool->wq, work); + mutex_unlock(&pool->lock); + return; + +err_out: + mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &local_list, i); + atomic64_add(i, &pool->sw_stats->tx_tls_pool_free); +} + +static void destroy_work(struct work_struct *work) +{ + struct mlx5e_tls_tx_pool *pool = + container_of(work, struct mlx5e_tls_tx_pool, destroy_work); + struct mlx5e_ktls_offload_context_tx *obj; + LIST_HEAD(local_list); + int i = 0; + + mutex_lock(&pool->lock); + if (pool->size < MLX5E_TLS_TX_POOL_HIGH) { + mutex_unlock(&pool->lock); + return; + } + + list_for_each_entry(obj, &pool->list, list_node) + if (++i == MLX5E_TLS_TX_POOL_BULK) + break; + + list_cut_position(&local_list, &pool->list, &obj->list_node); + pool->size -= MLX5E_TLS_TX_POOL_BULK; + if (pool->size >= MLX5E_TLS_TX_POOL_HIGH) + queue_work(pool->wq, work); + mutex_unlock(&pool->lock); + + mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &local_list, MLX5E_TLS_TX_POOL_BULK); + atomic64_add(MLX5E_TLS_TX_POOL_BULK, &pool->sw_stats->tx_tls_pool_free); +} + +static struct mlx5e_tls_tx_pool *mlx5e_tls_tx_pool_init(struct mlx5_core_dev *mdev, + struct mlx5e_tls_sw_stats *sw_stats) +{ + struct mlx5e_tls_tx_pool *pool; + + BUILD_BUG_ON(MLX5E_TLS_TX_POOL_LOW + MLX5E_TLS_TX_POOL_BULK >= MLX5E_TLS_TX_POOL_HIGH); + + pool = kvzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + return NULL; + + pool->wq = create_singlethread_workqueue("mlx5e_tls_tx_pool"); + if (!pool->wq) + goto err_free; + + INIT_LIST_HEAD(&pool->list); + mutex_init(&pool->lock); + + INIT_WORK(&pool->create_work, create_work); + INIT_WORK(&pool->destroy_work, destroy_work); + + pool->mdev = mdev; + pool->sw_stats = sw_stats; + + return pool; + +err_free: + kvfree(pool); + return NULL; +} + +static void mlx5e_tls_tx_pool_list_cleanup(struct mlx5e_tls_tx_pool *pool) +{ + while (pool->size > MLX5E_TLS_TX_POOL_BULK) { + struct mlx5e_ktls_offload_context_tx *obj; + LIST_HEAD(local_list); + int i = 0; + + list_for_each_entry(obj, &pool->list, list_node) + if (++i == MLX5E_TLS_TX_POOL_BULK) + break; + + list_cut_position(&local_list, &pool->list, &obj->list_node); + mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &local_list, MLX5E_TLS_TX_POOL_BULK); + atomic64_add(MLX5E_TLS_TX_POOL_BULK, &pool->sw_stats->tx_tls_pool_free); + pool->size -= MLX5E_TLS_TX_POOL_BULK; + } + if (pool->size) { + mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &pool->list, pool->size); + atomic64_add(pool->size, &pool->sw_stats->tx_tls_pool_free); + } +} + +static void mlx5e_tls_tx_pool_cleanup(struct mlx5e_tls_tx_pool *pool) +{ + mlx5e_tls_tx_pool_list_cleanup(pool); + destroy_workqueue(pool->wq); + kvfree(pool); +} + +static void pool_push(struct mlx5e_tls_tx_pool *pool, struct mlx5e_ktls_offload_context_tx *obj) +{ + mutex_lock(&pool->lock); + list_add(&obj->list_node, &pool->list); + if (++pool->size == MLX5E_TLS_TX_POOL_HIGH) + queue_work(pool->wq, &pool->destroy_work); + mutex_unlock(&pool->lock); +} + +static struct mlx5e_ktls_offload_context_tx *pool_pop(struct mlx5e_tls_tx_pool *pool) +{ + struct mlx5e_ktls_offload_context_tx *obj; + + mutex_lock(&pool->lock); + if (unlikely(pool->size == 0)) { + /* pool is empty: + * - trigger the populating work, and + * - serve the current context via the regular blocking api. + */ + queue_work(pool->wq, &pool->create_work); + mutex_unlock(&pool->lock); + obj = mlx5e_tls_priv_tx_init(pool->mdev, pool->sw_stats, NULL); + if (!IS_ERR(obj)) + atomic64_inc(&pool->sw_stats->tx_tls_pool_alloc); + return obj; + } + + obj = list_first_entry(&pool->list, struct mlx5e_ktls_offload_context_tx, + list_node); + list_del(&obj->list_node); + if (--pool->size == MLX5E_TLS_TX_POOL_LOW) + queue_work(pool->wq, &pool->create_work); + mutex_unlock(&pool->lock); + return obj; +} + +/* End of pool API */ + +int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk, + struct tls_crypto_info *crypto_info, u32 start_offload_tcp_sn) +{ + struct mlx5e_ktls_offload_context_tx *priv_tx; + struct mlx5e_tls_tx_pool *pool; + struct tls_context *tls_ctx; + struct mlx5_crypto_dek *dek; + struct mlx5e_priv *priv; + int err; + + tls_ctx = tls_get_ctx(sk); + priv = netdev_priv(netdev); + pool = priv->tls->tx_pool; + + priv_tx = pool_pop(pool); + if (IS_ERR(priv_tx)) + return PTR_ERR(priv_tx); + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: + priv_tx->crypto_info.crypto_info_128 = + *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + break; + case TLS_CIPHER_AES_GCM_256: + priv_tx->crypto_info.crypto_info_256 = + *(struct tls12_crypto_info_aes_gcm_256 *)crypto_info; + break; + default: + WARN_ONCE(1, "Unsupported cipher type %u\n", + crypto_info->cipher_type); + err = -EOPNOTSUPP; + goto err_pool_push; + } + + dek = mlx5_ktls_create_key(priv->tls->dek_pool, crypto_info); + if (IS_ERR(dek)) { + err = PTR_ERR(dek); + goto err_pool_push; + } + + priv_tx->dek = dek; + priv_tx->expected_seq = start_offload_tcp_sn; + priv_tx->tx_ctx = tls_offload_ctx_tx(tls_ctx); + + mlx5e_set_ktls_tx_priv_ctx(tls_ctx, priv_tx); + + priv_tx->ctx_post_pending = true; + atomic64_inc(&priv_tx->sw_stats->tx_tls_ctx); + + return 0; + +err_pool_push: + pool_push(pool, priv_tx); + return err; +} + +void mlx5e_ktls_del_tx(struct net_device *netdev, struct tls_context *tls_ctx) +{ + struct mlx5e_ktls_offload_context_tx *priv_tx; + struct mlx5e_tls_tx_pool *pool; + struct mlx5e_priv *priv; + + priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx); + priv = netdev_priv(netdev); + pool = priv->tls->tx_pool; + + atomic64_inc(&priv_tx->sw_stats->tx_tls_del); + mlx5_ktls_destroy_key(priv->tls->dek_pool, priv_tx->dek); + pool_push(pool, priv_tx); +} + +static void tx_fill_wi(struct mlx5e_txqsq *sq, + u16 pi, u8 num_wqebbs, u32 num_bytes, + struct page *page) +{ + struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi]; + + *wi = (struct mlx5e_tx_wqe_info) { + .num_wqebbs = num_wqebbs, + .num_bytes = num_bytes, + .resync_dump_frag_page = page, + }; +} + +static bool +mlx5e_ktls_tx_offload_test_and_clear_pending(struct mlx5e_ktls_offload_context_tx *priv_tx) +{ + bool ret = priv_tx->ctx_post_pending; + + priv_tx->ctx_post_pending = false; + + return ret; +} + +static void +post_static_params(struct mlx5e_txqsq *sq, + struct mlx5e_ktls_offload_context_tx *priv_tx, + bool fence) +{ + struct mlx5e_set_tls_static_params_wqe *wqe; + u16 pi, num_wqebbs; + + num_wqebbs = MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS; + pi = mlx5e_txqsq_get_next_pi(sq, num_wqebbs); + wqe = MLX5E_TLS_FETCH_SET_STATIC_PARAMS_WQE(sq, pi); + mlx5e_ktls_build_static_params(wqe, sq->pc, sq->sqn, &priv_tx->crypto_info, + priv_tx->tisn, + mlx5_crypto_dek_get_id(priv_tx->dek), + 0, fence, TLS_OFFLOAD_CTX_DIR_TX); + tx_fill_wi(sq, pi, num_wqebbs, 0, NULL); + sq->pc += num_wqebbs; +} + +static void +post_progress_params(struct mlx5e_txqsq *sq, + struct mlx5e_ktls_offload_context_tx *priv_tx, + bool fence) +{ + struct mlx5e_set_tls_progress_params_wqe *wqe; + u16 pi, num_wqebbs; + + num_wqebbs = MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS; + pi = mlx5e_txqsq_get_next_pi(sq, num_wqebbs); + wqe = MLX5E_TLS_FETCH_SET_PROGRESS_PARAMS_WQE(sq, pi); + mlx5e_ktls_build_progress_params(wqe, sq->pc, sq->sqn, priv_tx->tisn, fence, 0, + TLS_OFFLOAD_CTX_DIR_TX); + tx_fill_wi(sq, pi, num_wqebbs, 0, NULL); + sq->pc += num_wqebbs; +} + +static void tx_post_fence_nop(struct mlx5e_txqsq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + + tx_fill_wi(sq, pi, 1, 0, NULL); + + mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc); +} + +static void +mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq, + struct mlx5e_ktls_offload_context_tx *priv_tx, + bool skip_static_post, bool fence_first_post) +{ + bool progress_fence = skip_static_post || !fence_first_post; + + if (!skip_static_post) + post_static_params(sq, priv_tx, fence_first_post); + + post_progress_params(sq, priv_tx, progress_fence); + tx_post_fence_nop(sq); +} + +struct tx_sync_info { + u64 rcd_sn; + u32 sync_len; + int nr_frags; + skb_frag_t frags[MAX_SKB_FRAGS]; +}; + +enum mlx5e_ktls_sync_retval { + MLX5E_KTLS_SYNC_DONE, + MLX5E_KTLS_SYNC_FAIL, + MLX5E_KTLS_SYNC_SKIP_NO_DATA, +}; + +static enum mlx5e_ktls_sync_retval +tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx, + u32 tcp_seq, int datalen, struct tx_sync_info *info) +{ + struct tls_offload_context_tx *tx_ctx = priv_tx->tx_ctx; + enum mlx5e_ktls_sync_retval ret = MLX5E_KTLS_SYNC_DONE; + struct tls_record_info *record; + int remaining, i = 0; + unsigned long flags; + bool ends_before; + + spin_lock_irqsave(&tx_ctx->lock, flags); + record = tls_get_record(tx_ctx, tcp_seq, &info->rcd_sn); + + if (unlikely(!record)) { + ret = MLX5E_KTLS_SYNC_FAIL; + goto out; + } + + /* There are the following cases: + * 1. packet ends before start marker: bypass offload. + * 2. packet starts before start marker and ends after it: drop, + * not supported, breaks contract with kernel. + * 3. packet ends before tls record info starts: drop, + * this packet was already acknowledged and its record info + * was released. + */ + ends_before = before(tcp_seq + datalen - 1, tls_record_start_seq(record)); + + if (unlikely(tls_record_is_start_marker(record))) { + ret = ends_before ? MLX5E_KTLS_SYNC_SKIP_NO_DATA : MLX5E_KTLS_SYNC_FAIL; + goto out; + } else if (ends_before) { + ret = MLX5E_KTLS_SYNC_FAIL; + goto out; + } + + info->sync_len = tcp_seq - tls_record_start_seq(record); + remaining = info->sync_len; + while (remaining > 0) { + skb_frag_t *frag = &record->frags[i]; + + get_page(skb_frag_page(frag)); + remaining -= skb_frag_size(frag); + info->frags[i++] = *frag; + } + /* reduce the part which will be sent with the original SKB */ + if (remaining < 0) + skb_frag_size_add(&info->frags[i - 1], remaining); + info->nr_frags = i; +out: + spin_unlock_irqrestore(&tx_ctx->lock, flags); + return ret; +} + +static void +tx_post_resync_params(struct mlx5e_txqsq *sq, + struct mlx5e_ktls_offload_context_tx *priv_tx, + u64 rcd_sn) +{ + __be64 rn_be = cpu_to_be64(rcd_sn); + bool skip_static_post; + u16 rec_seq_sz; + char *rec_seq; + + switch (priv_tx->crypto_info.crypto_info.cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 *info = &priv_tx->crypto_info.crypto_info_128; + + rec_seq = info->rec_seq; + rec_seq_sz = sizeof(info->rec_seq); + break; + } + case TLS_CIPHER_AES_GCM_256: { + struct tls12_crypto_info_aes_gcm_256 *info = &priv_tx->crypto_info.crypto_info_256; + + rec_seq = info->rec_seq; + rec_seq_sz = sizeof(info->rec_seq); + break; + } + default: + WARN_ONCE(1, "Unsupported cipher type %u\n", + priv_tx->crypto_info.crypto_info.cipher_type); + return; + } + + skip_static_post = !memcmp(rec_seq, &rn_be, rec_seq_sz); + if (!skip_static_post) + memcpy(rec_seq, &rn_be, rec_seq_sz); + + mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, skip_static_post, true); +} + +static int +tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn) +{ + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5_wqe_data_seg *dseg; + struct mlx5e_dump_wqe *wqe; + dma_addr_t dma_addr = 0; + u16 ds_cnt; + int fsz; + u16 pi; + + BUILD_BUG_ON(MLX5E_KTLS_DUMP_WQEBBS != 1); + pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); + wqe = MLX5E_TLS_FETCH_DUMP_WQE(sq, pi); + + ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; + + cseg = &wqe->ctrl; + dseg = &wqe->data; + + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_DUMP); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + cseg->tis_tir_num = cpu_to_be32(tisn << 8); + + fsz = skb_frag_size(frag); + dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) + return -ENOMEM; + + dseg->addr = cpu_to_be64(dma_addr); + dseg->lkey = sq->mkey_be; + dseg->byte_count = cpu_to_be32(fsz); + mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); + + tx_fill_wi(sq, pi, MLX5E_KTLS_DUMP_WQEBBS, fsz, skb_frag_page(frag)); + sq->pc += MLX5E_KTLS_DUMP_WQEBBS; + + return 0; +} + +void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, + struct mlx5e_tx_wqe_info *wi, + u32 *dma_fifo_cc) +{ + struct mlx5e_sq_stats *stats; + struct mlx5e_sq_dma *dma; + + dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++); + stats = sq->stats; + + mlx5e_tx_dma_unmap(sq->pdev, dma); + put_page(wi->resync_dump_frag_page); + stats->tls_dump_packets++; + stats->tls_dump_bytes += wi->num_bytes; +} + +static enum mlx5e_ktls_sync_retval +mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, + struct mlx5e_txqsq *sq, + int datalen, + u32 seq) +{ + enum mlx5e_ktls_sync_retval ret; + struct tx_sync_info info = {}; + int i; + + ret = tx_sync_info_get(priv_tx, seq, datalen, &info); + if (unlikely(ret != MLX5E_KTLS_SYNC_DONE)) + /* We might get here with ret == FAIL if a retransmission + * reaches the driver after the relevant record is acked. + * It should be safe to drop the packet in this case + */ + return ret; + + tx_post_resync_params(sq, priv_tx, info.rcd_sn); + + for (i = 0; i < info.nr_frags; i++) { + unsigned int orig_fsz, frag_offset = 0, n = 0; + skb_frag_t *f = &info.frags[i]; + + orig_fsz = skb_frag_size(f); + + do { + unsigned int fsz; + + n++; + fsz = min_t(unsigned int, sq->hw_mtu, orig_fsz - frag_offset); + skb_frag_size_set(f, fsz); + if (tx_post_resync_dump(sq, f, priv_tx->tisn)) { + page_ref_add(skb_frag_page(f), n - 1); + goto err_out; + } + + skb_frag_off_add(f, fsz); + frag_offset += fsz; + } while (frag_offset < orig_fsz); + + page_ref_add(skb_frag_page(f), n - 1); + } + + return MLX5E_KTLS_SYNC_DONE; + +err_out: + for (; i < info.nr_frags; i++) + /* The put_page() here undoes the page ref obtained in tx_sync_info_get(). + * Page refs obtained for the DUMP WQEs above (by page_ref_add) will be + * released only upon their completions (or in mlx5e_free_txqsq_descs, + * if channel closes). + */ + put_page(skb_frag_page(&info.frags[i])); + + return MLX5E_KTLS_SYNC_FAIL; +} + +bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, + struct sk_buff *skb, + struct mlx5e_accel_tx_tls_state *state) +{ + struct mlx5e_ktls_offload_context_tx *priv_tx; + struct mlx5e_sq_stats *stats = sq->stats; + struct net_device *tls_netdev; + struct tls_context *tls_ctx; + int datalen; + u32 seq; + + datalen = skb->len - skb_tcp_all_headers(skb); + if (!datalen) + return true; + + mlx5e_tx_mpwqe_ensure_complete(sq); + + tls_ctx = tls_get_ctx(skb->sk); + tls_netdev = rcu_dereference_bh(tls_ctx->netdev); + /* Don't WARN on NULL: if tls_device_down is running in parallel, + * netdev might become NULL, even if tls_is_skb_tx_device_offloaded was + * true. Rather continue processing this packet. + */ + if (WARN_ON_ONCE(tls_netdev && tls_netdev != netdev)) + goto err_out; + + priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx); + + if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx))) + mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, false, false); + + seq = ntohl(tcp_hdr(skb)->seq); + if (unlikely(priv_tx->expected_seq != seq)) { + enum mlx5e_ktls_sync_retval ret = + mlx5e_ktls_tx_handle_ooo(priv_tx, sq, datalen, seq); + + stats->tls_ooo++; + + switch (ret) { + case MLX5E_KTLS_SYNC_DONE: + break; + case MLX5E_KTLS_SYNC_SKIP_NO_DATA: + stats->tls_skip_no_sync_data++; + if (likely(!skb->decrypted)) + goto out; + WARN_ON_ONCE(1); + goto err_out; + case MLX5E_KTLS_SYNC_FAIL: + stats->tls_drop_no_sync_data++; + goto err_out; + } + } + + priv_tx->expected_seq = seq + datalen; + + state->tls_tisn = priv_tx->tisn; + + stats->tls_encrypted_packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; + stats->tls_encrypted_bytes += datalen; + +out: + return true; + +err_out: + dev_kfree_skb_any(skb); + return false; +} + +static void mlx5e_tls_tx_debugfs_init(struct mlx5e_tls *tls, + struct dentry *dfs_root) +{ + if (IS_ERR_OR_NULL(dfs_root)) + return; + + tls->debugfs.dfs_tx = debugfs_create_dir("tx", dfs_root); + + debugfs_create_size_t("pool_size", 0400, tls->debugfs.dfs_tx, + &tls->tx_pool->size); +} + +int mlx5e_ktls_init_tx(struct mlx5e_priv *priv) +{ + struct mlx5_crypto_dek_pool *dek_pool; + struct mlx5e_tls *tls = priv->tls; + int err; + + if (!mlx5e_is_ktls_device(priv->mdev)) + return 0; + + /* DEK pool could be used by either or both of TX and RX. But we have to + * put the creation here to avoid syndrome when doing devlink reload. + */ + dek_pool = mlx5_crypto_dek_pool_create(priv->mdev, MLX5_ACCEL_OBJ_TLS_KEY); + if (IS_ERR(dek_pool)) + return PTR_ERR(dek_pool); + tls->dek_pool = dek_pool; + + if (!mlx5e_is_ktls_tx(priv->mdev)) + return 0; + + priv->tls->tx_pool = mlx5e_tls_tx_pool_init(priv->mdev, &priv->tls->sw_stats); + if (!priv->tls->tx_pool) { + err = -ENOMEM; + goto err_tx_pool_init; + } + + mlx5e_tls_tx_debugfs_init(tls, tls->debugfs.dfs); + + return 0; + +err_tx_pool_init: + mlx5_crypto_dek_pool_destroy(dek_pool); + return err; +} + +void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv) +{ + if (!mlx5e_is_ktls_tx(priv->mdev)) + goto dek_pool_destroy; + + debugfs_remove_recursive(priv->tls->debugfs.dfs_tx); + priv->tls->debugfs.dfs_tx = NULL; + + mlx5e_tls_tx_pool_cleanup(priv->tls->tx_pool); + priv->tls->tx_pool = NULL; + +dek_pool_destroy: + if (mlx5e_is_ktls_device(priv->mdev)) + mlx5_crypto_dek_pool_destroy(priv->tls->dek_pool); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.c new file mode 100644 index 0000000000..570a912dd6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#include "en_accel/ktls_txrx.h" +#include "en_accel/ktls_utils.h" + +enum { + MLX5E_STATIC_PARAMS_CONTEXT_TLS_1_2 = 0x2, +}; + +enum { + MLX5E_ENCRYPTION_STANDARD_TLS = 0x1, +}; + +#define EXTRACT_INFO_FIELDS do { \ + salt = info->salt; \ + rec_seq = info->rec_seq; \ + salt_sz = sizeof(info->salt); \ + rec_seq_sz = sizeof(info->rec_seq); \ +} while (0) + +static void +fill_static_params(struct mlx5_wqe_tls_static_params_seg *params, + union mlx5e_crypto_info *crypto_info, + u32 key_id, u32 resync_tcp_sn) +{ + char *initial_rn, *gcm_iv; + u16 salt_sz, rec_seq_sz; + char *salt, *rec_seq; + u8 tls_version; + u8 *ctx; + + ctx = params->ctx; + + switch (crypto_info->crypto_info.cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 *info = + &crypto_info->crypto_info_128; + + EXTRACT_INFO_FIELDS; + break; + } + case TLS_CIPHER_AES_GCM_256: { + struct tls12_crypto_info_aes_gcm_256 *info = + &crypto_info->crypto_info_256; + + EXTRACT_INFO_FIELDS; + break; + } + default: + WARN_ONCE(1, "Unsupported cipher type %u\n", + crypto_info->crypto_info.cipher_type); + return; + } + + gcm_iv = MLX5_ADDR_OF(tls_static_params, ctx, gcm_iv); + initial_rn = MLX5_ADDR_OF(tls_static_params, ctx, initial_record_number); + + memcpy(gcm_iv, salt, salt_sz); + memcpy(initial_rn, rec_seq, rec_seq_sz); + + tls_version = MLX5E_STATIC_PARAMS_CONTEXT_TLS_1_2; + + MLX5_SET(tls_static_params, ctx, tls_version, tls_version); + MLX5_SET(tls_static_params, ctx, const_1, 1); + MLX5_SET(tls_static_params, ctx, const_2, 2); + MLX5_SET(tls_static_params, ctx, encryption_standard, + MLX5E_ENCRYPTION_STANDARD_TLS); + MLX5_SET(tls_static_params, ctx, resync_tcp_sn, resync_tcp_sn); + MLX5_SET(tls_static_params, ctx, dek_index, key_id); +} + +void +mlx5e_ktls_build_static_params(struct mlx5e_set_tls_static_params_wqe *wqe, + u16 pc, u32 sqn, + union mlx5e_crypto_info *crypto_info, + u32 tis_tir_num, u32 key_id, u32 resync_tcp_sn, + bool fence, enum tls_offload_ctx_dir direction) +{ + struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + u8 opmod = direction == TLS_OFFLOAD_CTX_DIR_TX ? + MLX5_OPC_MOD_TLS_TIS_STATIC_PARAMS : + MLX5_OPC_MOD_TLS_TIR_STATIC_PARAMS; + +#define STATIC_PARAMS_DS_CNT DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS) + + cseg->opmod_idx_opcode = cpu_to_be32((pc << 8) | MLX5_OPCODE_UMR | (opmod << 24)); + cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) | + STATIC_PARAMS_DS_CNT); + cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; + cseg->tis_tir_num = cpu_to_be32(tis_tir_num << 8); + + ucseg->flags = MLX5_UMR_INLINE; + ucseg->bsf_octowords = cpu_to_be16(MLX5_ST_SZ_BYTES(tls_static_params) / 16); + + fill_static_params(&wqe->params, crypto_info, key_id, resync_tcp_sn); +} + +static void +fill_progress_params(struct mlx5_wqe_tls_progress_params_seg *params, u32 tis_tir_num, + u32 next_record_tcp_sn) +{ + u8 *ctx = params->ctx; + + params->tis_tir_num = cpu_to_be32(tis_tir_num); + + MLX5_SET(tls_progress_params, ctx, next_record_tcp_sn, + next_record_tcp_sn); + MLX5_SET(tls_progress_params, ctx, record_tracker_state, + MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START); + MLX5_SET(tls_progress_params, ctx, auth_state, + MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD); +} + +void +mlx5e_ktls_build_progress_params(struct mlx5e_set_tls_progress_params_wqe *wqe, + u16 pc, u32 sqn, + u32 tis_tir_num, bool fence, + u32 next_record_tcp_sn, + enum tls_offload_ctx_dir direction) +{ + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + u8 opmod = direction == TLS_OFFLOAD_CTX_DIR_TX ? + MLX5_OPC_MOD_TLS_TIS_PROGRESS_PARAMS : + MLX5_OPC_MOD_TLS_TIR_PROGRESS_PARAMS; + +#define PROGRESS_PARAMS_DS_CNT DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS) + + cseg->opmod_idx_opcode = + cpu_to_be32((pc << 8) | MLX5_OPCODE_SET_PSV | (opmod << 24)); + cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) | + PROGRESS_PARAMS_DS_CNT); + cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; + + fill_progress_params(&wqe->params, tis_tir_num, next_record_tcp_sn); +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h new file mode 100644 index 0000000000..f87b65c560 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5E_KTLS_TXRX_H__ +#define __MLX5E_KTLS_TXRX_H__ + +#ifdef CONFIG_MLX5_EN_TLS + +#include +#include "en.h" +#include "en/txrx.h" + +struct mlx5e_accel_tx_tls_state { + u32 tls_tisn; +}; + +u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params); + +bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, + struct sk_buff *skb, + struct mlx5e_accel_tx_tls_state *state); +void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb, + struct mlx5_cqe64 *cqe, u32 *cqe_bcnt); + +void mlx5e_ktls_handle_ctx_completion(struct mlx5e_icosq_wqe_info *wi); +void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, + struct mlx5e_icosq *sq); + +void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, + struct mlx5e_tx_wqe_info *wi, + u32 *dma_fifo_cc); +static inline bool +mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq, + struct mlx5e_tx_wqe_info *wi, + u32 *dma_fifo_cc) +{ + if (unlikely(wi->resync_dump_frag_page)) { + mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma_fifo_cc); + return true; + } + return false; +} + +bool mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget); + +static inline bool +mlx5e_ktls_rx_pending_resync_list(struct mlx5e_channel *c, int budget) +{ + return budget && test_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &c->async_icosq.state); +} + +static inline void +mlx5e_ktls_handle_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg, + struct mlx5e_accel_tx_tls_state *state) +{ + cseg->tis_tir_num = cpu_to_be32(state->tls_tisn << 8); +} +#else +static inline bool +mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq, + struct mlx5e_tx_wqe_info *wi, + u32 *dma_fifo_cc) +{ + return false; +} + +static inline bool +mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget) +{ + return false; +} + +static inline bool +mlx5e_ktls_rx_pending_resync_list(struct mlx5e_channel *c, int budget) +{ + return false; +} + +static inline u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + return 0; +} + +static inline void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq, + struct sk_buff *skb, + struct mlx5_cqe64 *cqe, + u32 *cqe_bcnt) +{ +} +#endif /* CONFIG_MLX5_EN_TLS */ + +#endif /* __MLX5E_TLS_TXRX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h new file mode 100644 index 0000000000..3d79cd3798 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5E_KTLS_UTILS_H__ +#define __MLX5E_KTLS_UTILS_H__ + +#include +#include "en.h" + +enum { + MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD = 0, + MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_OFFLOAD = 1, + MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_AUTHENTICATION = 2, +}; + +enum { + MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START = 0, + MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING = 1, + MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_SEARCHING = 2, +}; + +int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk, + struct tls_crypto_info *crypto_info, u32 start_offload_tcp_sn); +void mlx5e_ktls_del_tx(struct net_device *netdev, struct tls_context *tls_ctx); +int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, + struct tls_crypto_info *crypto_info, u32 start_offload_tcp_sn); +void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx); +void mlx5e_ktls_rx_resync(struct net_device *netdev, struct sock *sk, u32 seq, u8 *rcd_sn); + +union mlx5e_crypto_info { + struct tls_crypto_info crypto_info; + struct tls12_crypto_info_aes_gcm_128 crypto_info_128; + struct tls12_crypto_info_aes_gcm_256 crypto_info_256; +}; + +struct mlx5e_set_tls_static_params_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_umr_ctrl_seg uctrl; + struct mlx5_mkey_seg mkc; + struct mlx5_wqe_tls_static_params_seg params; +}; + +struct mlx5e_set_tls_progress_params_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_tls_progress_params_seg params; +}; + +struct mlx5e_get_tls_progress_params_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_seg_get_psv psv; +}; + +#define MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS \ + (DIV_ROUND_UP(sizeof(struct mlx5e_set_tls_static_params_wqe), MLX5_SEND_WQE_BB)) + +#define MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS \ + (DIV_ROUND_UP(sizeof(struct mlx5e_set_tls_progress_params_wqe), MLX5_SEND_WQE_BB)) + +#define MLX5E_KTLS_GET_PROGRESS_WQEBBS \ + (DIV_ROUND_UP(sizeof(struct mlx5e_get_tls_progress_params_wqe), MLX5_SEND_WQE_BB)) + +#define MLX5E_TLS_FETCH_SET_STATIC_PARAMS_WQE(sq, pi) \ + ((struct mlx5e_set_tls_static_params_wqe *)\ + mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5e_set_tls_static_params_wqe))) + +#define MLX5E_TLS_FETCH_SET_PROGRESS_PARAMS_WQE(sq, pi) \ + ((struct mlx5e_set_tls_progress_params_wqe *)\ + mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5e_set_tls_progress_params_wqe))) + +#define MLX5E_TLS_FETCH_GET_PROGRESS_PARAMS_WQE(sq, pi) \ + ((struct mlx5e_get_tls_progress_params_wqe *)\ + mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5e_get_tls_progress_params_wqe))) + +#define MLX5E_TLS_FETCH_DUMP_WQE(sq, pi) \ + ((struct mlx5e_dump_wqe *)\ + mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5e_dump_wqe))) + +void +mlx5e_ktls_build_static_params(struct mlx5e_set_tls_static_params_wqe *wqe, + u16 pc, u32 sqn, + union mlx5e_crypto_info *crypto_info, + u32 tis_tir_num, u32 key_id, u32 resync_tcp_sn, + bool fence, enum tls_offload_ctx_dir direction); +void +mlx5e_ktls_build_progress_params(struct mlx5e_set_tls_progress_params_wqe *wqe, + u16 pc, u32 sqn, + u32 tis_tir_num, bool fence, + u32 next_record_tcp_sn, + enum tls_offload_ctx_dir direction); + +#endif /* __MLX5E_TLS_UTILS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c new file mode 100644 index 0000000000..d4ebd87431 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -0,0 +1,1775 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include +#include +#include +#include + +#include "en.h" +#include "lib/aso.h" +#include "lib/crypto.h" +#include "en_accel/macsec.h" + +#define MLX5_MACSEC_EPN_SCOPE_MID 0x80000000L +#define MLX5E_MACSEC_ASO_CTX_SZ MLX5_ST_SZ_BYTES(macsec_aso) + +enum mlx5_macsec_aso_event_arm { + MLX5E_ASO_EPN_ARM = BIT(0), +}; + +enum { + MLX5_MACSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET, +}; + +struct mlx5e_macsec_handle { + struct mlx5e_macsec *macsec; + u32 obj_id; + u8 idx; +}; + +enum { + MLX5_MACSEC_EPN, +}; + +struct mlx5e_macsec_aso_out { + u8 event_arm; + u32 mode_param; +}; + +struct mlx5e_macsec_aso_in { + u8 mode; + u32 obj_id; +}; + +struct mlx5e_macsec_epn_state { + u32 epn_msb; + u8 epn_enabled; + u8 overlap; +}; + +struct mlx5e_macsec_async_work { + struct mlx5e_macsec *macsec; + struct mlx5_core_dev *mdev; + struct work_struct work; + u32 obj_id; +}; + +struct mlx5e_macsec_sa { + bool active; + u8 assoc_num; + u32 macsec_obj_id; + u32 enc_key_id; + u32 next_pn; + sci_t sci; + ssci_t ssci; + salt_t salt; + + union mlx5_macsec_rule *macsec_rule; + struct rcu_head rcu_head; + struct mlx5e_macsec_epn_state epn_state; +}; + +struct mlx5e_macsec_rx_sc; +struct mlx5e_macsec_rx_sc_xarray_element { + u32 fs_id; + struct mlx5e_macsec_rx_sc *rx_sc; +}; + +struct mlx5e_macsec_rx_sc { + bool active; + sci_t sci; + struct mlx5e_macsec_sa *rx_sa[MACSEC_NUM_AN]; + struct list_head rx_sc_list_element; + struct mlx5e_macsec_rx_sc_xarray_element *sc_xarray_element; + struct metadata_dst *md_dst; + struct rcu_head rcu_head; +}; + +struct mlx5e_macsec_umr { + u8 __aligned(64) ctx[MLX5_ST_SZ_BYTES(macsec_aso)]; + dma_addr_t dma_addr; + u32 mkey; +}; + +struct mlx5e_macsec_aso { + /* ASO */ + struct mlx5_aso *maso; + /* Protects macsec ASO */ + struct mutex aso_lock; + /* UMR */ + struct mlx5e_macsec_umr *umr; + + u32 pdn; +}; + +struct mlx5e_macsec_device { + const struct net_device *netdev; + struct mlx5e_macsec_sa *tx_sa[MACSEC_NUM_AN]; + struct list_head macsec_rx_sc_list_head; + unsigned char *dev_addr; + struct list_head macsec_device_list_element; +}; + +struct mlx5e_macsec { + struct list_head macsec_device_list_head; + int num_of_devices; + struct mutex lock; /* Protects mlx5e_macsec internal contexts */ + + /* Rx fs_id -> rx_sc mapping */ + struct xarray sc_xarray; + + struct mlx5_core_dev *mdev; + + /* ASO */ + struct mlx5e_macsec_aso aso; + + struct notifier_block nb; + struct workqueue_struct *wq; +}; + +struct mlx5_macsec_obj_attrs { + u32 aso_pdn; + u32 next_pn; + __be64 sci; + u32 enc_key_id; + bool encrypt; + struct mlx5e_macsec_epn_state epn_state; + salt_t salt; + __be32 ssci; + bool replay_protect; + u32 replay_window; +}; + +struct mlx5_aso_ctrl_param { + u8 data_mask_mode; + u8 condition_0_operand; + u8 condition_1_operand; + u8 condition_0_offset; + u8 condition_1_offset; + u8 data_offset; + u8 condition_operand; + u32 condition_0_data; + u32 condition_0_mask; + u32 condition_1_data; + u32 condition_1_mask; + u64 bitwise_data; + u64 data_mask; +}; + +static int mlx5e_macsec_aso_reg_mr(struct mlx5_core_dev *mdev, struct mlx5e_macsec_aso *aso) +{ + struct mlx5e_macsec_umr *umr; + struct device *dma_device; + dma_addr_t dma_addr; + int err; + + umr = kzalloc(sizeof(*umr), GFP_KERNEL); + if (!umr) { + err = -ENOMEM; + return err; + } + + dma_device = mlx5_core_dma_dev(mdev); + dma_addr = dma_map_single(dma_device, umr->ctx, sizeof(umr->ctx), DMA_BIDIRECTIONAL); + err = dma_mapping_error(dma_device, dma_addr); + if (err) { + mlx5_core_err(mdev, "Can't map dma device, err=%d\n", err); + goto out_dma; + } + + err = mlx5e_create_mkey(mdev, aso->pdn, &umr->mkey); + if (err) { + mlx5_core_err(mdev, "Can't create mkey, err=%d\n", err); + goto out_mkey; + } + + umr->dma_addr = dma_addr; + + aso->umr = umr; + + return 0; + +out_mkey: + dma_unmap_single(dma_device, dma_addr, sizeof(umr->ctx), DMA_BIDIRECTIONAL); +out_dma: + kfree(umr); + return err; +} + +static void mlx5e_macsec_aso_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5e_macsec_aso *aso) +{ + struct mlx5e_macsec_umr *umr = aso->umr; + + mlx5_core_destroy_mkey(mdev, umr->mkey); + dma_unmap_single(&mdev->pdev->dev, umr->dma_addr, sizeof(umr->ctx), DMA_BIDIRECTIONAL); + kfree(umr); +} + +static int macsec_set_replay_protection(struct mlx5_macsec_obj_attrs *attrs, void *aso_ctx) +{ + u8 window_sz; + + if (!attrs->replay_protect) + return 0; + + switch (attrs->replay_window) { + case 256: + window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_256BIT; + break; + case 128: + window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_128BIT; + break; + case 64: + window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_64BIT; + break; + case 32: + window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_32BIT; + break; + default: + return -EINVAL; + } + MLX5_SET(macsec_aso, aso_ctx, window_size, window_sz); + MLX5_SET(macsec_aso, aso_ctx, mode, MLX5_MACSEC_ASO_REPLAY_PROTECTION); + + return 0; +} + +static int mlx5e_macsec_create_object(struct mlx5_core_dev *mdev, + struct mlx5_macsec_obj_attrs *attrs, + bool is_tx, + u32 *macsec_obj_id) +{ + u32 in[MLX5_ST_SZ_DW(create_macsec_obj_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + void *aso_ctx; + void *obj; + int err; + + obj = MLX5_ADDR_OF(create_macsec_obj_in, in, macsec_object); + aso_ctx = MLX5_ADDR_OF(macsec_offload_obj, obj, macsec_aso); + + MLX5_SET(macsec_offload_obj, obj, confidentiality_en, attrs->encrypt); + MLX5_SET(macsec_offload_obj, obj, dekn, attrs->enc_key_id); + MLX5_SET(macsec_offload_obj, obj, aso_return_reg, MLX5_MACSEC_ASO_REG_C_4_5); + MLX5_SET(macsec_offload_obj, obj, macsec_aso_access_pd, attrs->aso_pdn); + MLX5_SET(macsec_aso, aso_ctx, mode_parameter, attrs->next_pn); + + /* Epn */ + if (attrs->epn_state.epn_enabled) { + void *salt_p; + int i; + + MLX5_SET(macsec_aso, aso_ctx, epn_event_arm, 1); + MLX5_SET(macsec_offload_obj, obj, epn_en, 1); + MLX5_SET(macsec_offload_obj, obj, epn_msb, attrs->epn_state.epn_msb); + MLX5_SET(macsec_offload_obj, obj, epn_overlap, attrs->epn_state.overlap); + MLX5_SET64(macsec_offload_obj, obj, sci, (__force u64)attrs->ssci); + salt_p = MLX5_ADDR_OF(macsec_offload_obj, obj, salt); + for (i = 0; i < 3 ; i++) + memcpy((u32 *)salt_p + i, &attrs->salt.bytes[4 * (2 - i)], 4); + } else { + MLX5_SET64(macsec_offload_obj, obj, sci, (__force u64)(attrs->sci)); + } + + MLX5_SET(macsec_aso, aso_ctx, valid, 0x1); + if (is_tx) { + MLX5_SET(macsec_aso, aso_ctx, mode, MLX5_MACSEC_ASO_INC_SN); + } else { + err = macsec_set_replay_protection(attrs, aso_ctx); + if (err) + return err; + } + + /* general object fields set */ + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_MACSEC); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) { + mlx5_core_err(mdev, + "MACsec offload: Failed to create MACsec object (err = %d)\n", + err); + return err; + } + + *macsec_obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return err; +} + +static void mlx5e_macsec_destroy_object(struct mlx5_core_dev *mdev, u32 macsec_obj_id) +{ + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_MACSEC); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, macsec_obj_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec, + struct mlx5e_macsec_sa *sa, + bool is_tx, struct net_device *netdev, u32 fs_id) +{ + int action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT : + MLX5_ACCEL_MACSEC_ACTION_DECRYPT; + + if (!sa->macsec_rule) + return; + + mlx5_macsec_fs_del_rule(macsec->mdev->macsec_fs, sa->macsec_rule, action, netdev, + fs_id); + mlx5e_macsec_destroy_object(macsec->mdev, sa->macsec_obj_id); + sa->macsec_rule = NULL; +} + +static int mlx5e_macsec_init_sa(struct macsec_context *ctx, + struct mlx5e_macsec_sa *sa, + bool encrypt, bool is_tx, u32 *fs_id) +{ + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); + struct mlx5e_macsec *macsec = priv->macsec; + struct mlx5_macsec_rule_attrs rule_attrs; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_macsec_obj_attrs obj_attrs; + union mlx5_macsec_rule *macsec_rule; + int err; + + obj_attrs.next_pn = sa->next_pn; + obj_attrs.sci = cpu_to_be64((__force u64)sa->sci); + obj_attrs.enc_key_id = sa->enc_key_id; + obj_attrs.encrypt = encrypt; + obj_attrs.aso_pdn = macsec->aso.pdn; + obj_attrs.epn_state = sa->epn_state; + + if (sa->epn_state.epn_enabled) { + obj_attrs.ssci = cpu_to_be32((__force u32)sa->ssci); + memcpy(&obj_attrs.salt, &sa->salt, sizeof(sa->salt)); + } + + obj_attrs.replay_window = ctx->secy->replay_window; + obj_attrs.replay_protect = ctx->secy->replay_protect; + + err = mlx5e_macsec_create_object(mdev, &obj_attrs, is_tx, &sa->macsec_obj_id); + if (err) + return err; + + rule_attrs.macsec_obj_id = sa->macsec_obj_id; + rule_attrs.sci = sa->sci; + rule_attrs.assoc_num = sa->assoc_num; + rule_attrs.action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT : + MLX5_ACCEL_MACSEC_ACTION_DECRYPT; + + macsec_rule = mlx5_macsec_fs_add_rule(mdev->macsec_fs, ctx, &rule_attrs, fs_id); + if (!macsec_rule) { + err = -ENOMEM; + goto destroy_macsec_object; + } + + sa->macsec_rule = macsec_rule; + + return 0; + +destroy_macsec_object: + mlx5e_macsec_destroy_object(mdev, sa->macsec_obj_id); + + return err; +} + +static struct mlx5e_macsec_rx_sc * +mlx5e_macsec_get_rx_sc_from_sc_list(const struct list_head *list, sci_t sci) +{ + struct mlx5e_macsec_rx_sc *iter; + + list_for_each_entry_rcu(iter, list, rx_sc_list_element) { + if (iter->sci == sci) + return iter; + } + + return NULL; +} + +static int macsec_rx_sa_active_update(struct macsec_context *ctx, + struct mlx5e_macsec_sa *rx_sa, + bool active, u32 *fs_id) +{ + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); + struct mlx5e_macsec *macsec = priv->macsec; + int err = 0; + + if (rx_sa->active == active) + return 0; + + rx_sa->active = active; + if (!active) { + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false, ctx->secy->netdev, *fs_id); + return 0; + } + + err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false, fs_id); + if (err) + rx_sa->active = false; + + return err; +} + +static bool mlx5e_macsec_secy_features_validate(struct macsec_context *ctx) +{ + const struct net_device *netdev = ctx->netdev; + const struct macsec_secy *secy = ctx->secy; + + if (secy->validate_frames != MACSEC_VALIDATE_STRICT) { + netdev_err(netdev, + "MACsec offload is supported only when validate_frame is in strict mode\n"); + return false; + } + + if (secy->icv_len != MACSEC_DEFAULT_ICV_LEN) { + netdev_err(netdev, "MACsec offload is supported only when icv_len is %d\n", + MACSEC_DEFAULT_ICV_LEN); + return false; + } + + if (!secy->protect_frames) { + netdev_err(netdev, + "MACsec offload is supported only when protect_frames is set\n"); + return false; + } + + if (!ctx->secy->tx_sc.encrypt) { + netdev_err(netdev, "MACsec offload: encrypt off isn't supported\n"); + return false; + } + + return true; +} + +static struct mlx5e_macsec_device * +mlx5e_macsec_get_macsec_device_context(const struct mlx5e_macsec *macsec, + const struct macsec_context *ctx) +{ + struct mlx5e_macsec_device *iter; + const struct list_head *list; + + list = &macsec->macsec_device_list_head; + list_for_each_entry_rcu(iter, list, macsec_device_list_element) { + if (iter->netdev == ctx->secy->netdev) + return iter; + } + + return NULL; +} + +static void update_macsec_epn(struct mlx5e_macsec_sa *sa, const struct macsec_key *key, + const pn_t *next_pn_halves, ssci_t ssci) +{ + struct mlx5e_macsec_epn_state *epn_state = &sa->epn_state; + + sa->ssci = ssci; + sa->salt = key->salt; + epn_state->epn_enabled = 1; + epn_state->epn_msb = next_pn_halves->upper; + epn_state->overlap = next_pn_halves->lower < MLX5_MACSEC_EPN_SCOPE_MID ? 0 : 1; +} + +static int mlx5e_macsec_add_txsa(struct macsec_context *ctx) +{ + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); + const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc; + const struct macsec_tx_sa *ctx_tx_sa = ctx->sa.tx_sa; + const struct macsec_secy *secy = ctx->secy; + struct mlx5e_macsec_device *macsec_device; + struct mlx5_core_dev *mdev = priv->mdev; + u8 assoc_num = ctx->sa.assoc_num; + struct mlx5e_macsec_sa *tx_sa; + struct mlx5e_macsec *macsec; + int err = 0; + + mutex_lock(&priv->macsec->lock); + + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EEXIST; + goto out; + } + + if (macsec_device->tx_sa[assoc_num]) { + netdev_err(ctx->netdev, "MACsec offload tx_sa: %d already exist\n", assoc_num); + err = -EEXIST; + goto out; + } + + tx_sa = kzalloc(sizeof(*tx_sa), GFP_KERNEL); + if (!tx_sa) { + err = -ENOMEM; + goto out; + } + + tx_sa->active = ctx_tx_sa->active; + tx_sa->next_pn = ctx_tx_sa->next_pn_halves.lower; + tx_sa->sci = secy->sci; + tx_sa->assoc_num = assoc_num; + + if (secy->xpn) + update_macsec_epn(tx_sa, &ctx_tx_sa->key, &ctx_tx_sa->next_pn_halves, + ctx_tx_sa->ssci); + + err = mlx5_create_encryption_key(mdev, ctx->sa.key, secy->key_len, + MLX5_ACCEL_OBJ_MACSEC_KEY, + &tx_sa->enc_key_id); + if (err) + goto destroy_sa; + + macsec_device->tx_sa[assoc_num] = tx_sa; + if (!secy->operational || + assoc_num != tx_sc->encoding_sa || + !tx_sa->active) + goto out; + + err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true, NULL); + if (err) + goto destroy_encryption_key; + + mutex_unlock(&macsec->lock); + + return 0; + +destroy_encryption_key: + macsec_device->tx_sa[assoc_num] = NULL; + mlx5_destroy_encryption_key(mdev, tx_sa->enc_key_id); +destroy_sa: + kfree(tx_sa); +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) +{ + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); + const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc; + const struct macsec_tx_sa *ctx_tx_sa = ctx->sa.tx_sa; + struct mlx5e_macsec_device *macsec_device; + u8 assoc_num = ctx->sa.assoc_num; + struct mlx5e_macsec_sa *tx_sa; + struct mlx5e_macsec *macsec; + struct net_device *netdev; + int err = 0; + + mutex_lock(&priv->macsec->lock); + + macsec = priv->macsec; + netdev = ctx->netdev; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + tx_sa = macsec_device->tx_sa[assoc_num]; + if (!tx_sa) { + netdev_err(netdev, "MACsec offload: TX sa 0x%x doesn't exist\n", assoc_num); + err = -EEXIST; + goto out; + } + + if (ctx->sa.update_pn) { + netdev_err(netdev, "MACsec offload: update TX sa %d PN isn't supported\n", + assoc_num); + err = -EINVAL; + goto out; + } + + if (tx_sa->active == ctx_tx_sa->active) + goto out; + + tx_sa->active = ctx_tx_sa->active; + if (tx_sa->assoc_num != tx_sc->encoding_sa) + goto out; + + if (ctx_tx_sa->active) { + err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true, NULL); + if (err) + goto out; + } else { + if (!tx_sa->macsec_rule) { + err = -EINVAL; + goto out; + } + + mlx5e_macsec_cleanup_sa(macsec, tx_sa, true, ctx->secy->netdev, 0); + } +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_del_txsa(struct macsec_context *ctx) +{ + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); + struct mlx5e_macsec_device *macsec_device; + u8 assoc_num = ctx->sa.assoc_num; + struct mlx5e_macsec_sa *tx_sa; + struct mlx5e_macsec *macsec; + int err = 0; + + mutex_lock(&priv->macsec->lock); + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + tx_sa = macsec_device->tx_sa[assoc_num]; + if (!tx_sa) { + netdev_err(ctx->netdev, "MACsec offload: TX sa 0x%x doesn't exist\n", assoc_num); + err = -EEXIST; + goto out; + } + + mlx5e_macsec_cleanup_sa(macsec, tx_sa, true, ctx->secy->netdev, 0); + mlx5_destroy_encryption_key(macsec->mdev, tx_sa->enc_key_id); + kfree_rcu_mightsleep(tx_sa); + macsec_device->tx_sa[assoc_num] = NULL; + +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_add_rxsc(struct macsec_context *ctx) +{ + struct mlx5e_macsec_rx_sc_xarray_element *sc_xarray_element; + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); + const struct macsec_rx_sc *ctx_rx_sc = ctx->rx_sc; + struct mlx5e_macsec_device *macsec_device; + struct mlx5e_macsec_rx_sc *rx_sc; + struct list_head *rx_sc_list; + struct mlx5e_macsec *macsec; + int err = 0; + + mutex_lock(&priv->macsec->lock); + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + rx_sc_list = &macsec_device->macsec_rx_sc_list_head; + rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(rx_sc_list, ctx_rx_sc->sci); + if (rx_sc) { + netdev_err(ctx->netdev, "MACsec offload: rx_sc (sci %lld) already exists\n", + ctx_rx_sc->sci); + err = -EEXIST; + goto out; + } + + rx_sc = kzalloc(sizeof(*rx_sc), GFP_KERNEL); + if (!rx_sc) { + err = -ENOMEM; + goto out; + } + + sc_xarray_element = kzalloc(sizeof(*sc_xarray_element), GFP_KERNEL); + if (!sc_xarray_element) { + err = -ENOMEM; + goto destroy_rx_sc; + } + + sc_xarray_element->rx_sc = rx_sc; + err = xa_alloc(&macsec->sc_xarray, &sc_xarray_element->fs_id, sc_xarray_element, + XA_LIMIT(1, MLX5_MACEC_RX_FS_ID_MAX), GFP_KERNEL); + if (err) { + if (err == -EBUSY) + netdev_err(ctx->netdev, + "MACsec offload: unable to create entry for RX SC (%d Rx SCs already allocated)\n", + MLX5_MACEC_RX_FS_ID_MAX); + goto destroy_sc_xarray_elemenet; + } + + rx_sc->md_dst = metadata_dst_alloc(0, METADATA_MACSEC, GFP_KERNEL); + if (!rx_sc->md_dst) { + err = -ENOMEM; + goto erase_xa_alloc; + } + + rx_sc->sci = ctx_rx_sc->sci; + rx_sc->active = ctx_rx_sc->active; + list_add_rcu(&rx_sc->rx_sc_list_element, rx_sc_list); + + rx_sc->sc_xarray_element = sc_xarray_element; + rx_sc->md_dst->u.macsec_info.sci = rx_sc->sci; + mutex_unlock(&macsec->lock); + + return 0; + +erase_xa_alloc: + xa_erase(&macsec->sc_xarray, sc_xarray_element->fs_id); +destroy_sc_xarray_elemenet: + kfree(sc_xarray_element); +destroy_rx_sc: + kfree(rx_sc); + +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_upd_rxsc(struct macsec_context *ctx) +{ + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); + const struct macsec_rx_sc *ctx_rx_sc = ctx->rx_sc; + struct mlx5e_macsec_device *macsec_device; + struct mlx5e_macsec_rx_sc *rx_sc; + struct mlx5e_macsec_sa *rx_sa; + struct mlx5e_macsec *macsec; + struct list_head *list; + int i; + int err = 0; + + mutex_lock(&priv->macsec->lock); + + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + list = &macsec_device->macsec_rx_sc_list_head; + rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, ctx_rx_sc->sci); + if (!rx_sc) { + err = -EINVAL; + goto out; + } + + if (rx_sc->active == ctx_rx_sc->active) + goto out; + + rx_sc->active = ctx_rx_sc->active; + for (i = 0; i < MACSEC_NUM_AN; ++i) { + rx_sa = rx_sc->rx_sa[i]; + if (!rx_sa) + continue; + + err = macsec_rx_sa_active_update(ctx, rx_sa, rx_sa->active && ctx_rx_sc->active, + &rx_sc->sc_xarray_element->fs_id); + if (err) + goto out; + } + +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static void macsec_del_rxsc_ctx(struct mlx5e_macsec *macsec, struct mlx5e_macsec_rx_sc *rx_sc, + struct net_device *netdev) +{ + struct mlx5e_macsec_sa *rx_sa; + int i; + + for (i = 0; i < MACSEC_NUM_AN; ++i) { + rx_sa = rx_sc->rx_sa[i]; + if (!rx_sa) + continue; + + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false, netdev, + rx_sc->sc_xarray_element->fs_id); + mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); + + kfree(rx_sa); + rx_sc->rx_sa[i] = NULL; + } + + /* At this point the relevant MACsec offload Rx rule already removed at + * mlx5e_macsec_cleanup_sa need to wait for datapath to finish current + * Rx related data propagating using xa_erase which uses rcu to sync, + * once fs_id is erased then this rx_sc is hidden from datapath. + */ + list_del_rcu(&rx_sc->rx_sc_list_element); + xa_erase(&macsec->sc_xarray, rx_sc->sc_xarray_element->fs_id); + metadata_dst_free(rx_sc->md_dst); + kfree(rx_sc->sc_xarray_element); + kfree_rcu_mightsleep(rx_sc); +} + +static int mlx5e_macsec_del_rxsc(struct macsec_context *ctx) +{ + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); + struct mlx5e_macsec_device *macsec_device; + struct mlx5e_macsec_rx_sc *rx_sc; + struct mlx5e_macsec *macsec; + struct list_head *list; + int err = 0; + + mutex_lock(&priv->macsec->lock); + + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + list = &macsec_device->macsec_rx_sc_list_head; + rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, ctx->rx_sc->sci); + if (!rx_sc) { + netdev_err(ctx->netdev, + "MACsec offload rx_sc sci %lld doesn't exist\n", + ctx->sa.rx_sa->sc->sci); + err = -EINVAL; + goto out; + } + + macsec_del_rxsc_ctx(macsec, rx_sc, ctx->secy->netdev); +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_add_rxsa(struct macsec_context *ctx) +{ + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); + const struct macsec_rx_sa *ctx_rx_sa = ctx->sa.rx_sa; + struct mlx5e_macsec_device *macsec_device; + struct mlx5_core_dev *mdev = priv->mdev; + u8 assoc_num = ctx->sa.assoc_num; + struct mlx5e_macsec_rx_sc *rx_sc; + sci_t sci = ctx_rx_sa->sc->sci; + struct mlx5e_macsec_sa *rx_sa; + struct mlx5e_macsec *macsec; + struct list_head *list; + int err = 0; + + mutex_lock(&priv->macsec->lock); + + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + list = &macsec_device->macsec_rx_sc_list_head; + rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, sci); + if (!rx_sc) { + netdev_err(ctx->netdev, + "MACsec offload rx_sc sci %lld doesn't exist\n", + ctx->sa.rx_sa->sc->sci); + err = -EINVAL; + goto out; + } + + if (rx_sc->rx_sa[assoc_num]) { + netdev_err(ctx->netdev, + "MACsec offload rx_sc sci %lld rx_sa %d already exist\n", + sci, assoc_num); + err = -EEXIST; + goto out; + } + + rx_sa = kzalloc(sizeof(*rx_sa), GFP_KERNEL); + if (!rx_sa) { + err = -ENOMEM; + goto out; + } + + rx_sa->active = ctx_rx_sa->active; + rx_sa->next_pn = ctx_rx_sa->next_pn; + rx_sa->sci = sci; + rx_sa->assoc_num = assoc_num; + + if (ctx->secy->xpn) + update_macsec_epn(rx_sa, &ctx_rx_sa->key, &ctx_rx_sa->next_pn_halves, + ctx_rx_sa->ssci); + + err = mlx5_create_encryption_key(mdev, ctx->sa.key, ctx->secy->key_len, + MLX5_ACCEL_OBJ_MACSEC_KEY, + &rx_sa->enc_key_id); + if (err) + goto destroy_sa; + + rx_sc->rx_sa[assoc_num] = rx_sa; + if (!rx_sa->active) + goto out; + + //TODO - add support for both authentication and encryption flows + err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false, &rx_sc->sc_xarray_element->fs_id); + if (err) + goto destroy_encryption_key; + + goto out; + +destroy_encryption_key: + rx_sc->rx_sa[assoc_num] = NULL; + mlx5_destroy_encryption_key(mdev, rx_sa->enc_key_id); +destroy_sa: + kfree(rx_sa); +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_upd_rxsa(struct macsec_context *ctx) +{ + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); + const struct macsec_rx_sa *ctx_rx_sa = ctx->sa.rx_sa; + struct mlx5e_macsec_device *macsec_device; + u8 assoc_num = ctx->sa.assoc_num; + struct mlx5e_macsec_rx_sc *rx_sc; + sci_t sci = ctx_rx_sa->sc->sci; + struct mlx5e_macsec_sa *rx_sa; + struct mlx5e_macsec *macsec; + struct list_head *list; + int err = 0; + + mutex_lock(&priv->macsec->lock); + + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + list = &macsec_device->macsec_rx_sc_list_head; + rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, sci); + if (!rx_sc) { + netdev_err(ctx->netdev, + "MACsec offload rx_sc sci %lld doesn't exist\n", + ctx->sa.rx_sa->sc->sci); + err = -EINVAL; + goto out; + } + + rx_sa = rx_sc->rx_sa[assoc_num]; + if (!rx_sa) { + netdev_err(ctx->netdev, + "MACsec offload rx_sc sci %lld rx_sa %d doesn't exist\n", + sci, assoc_num); + err = -EINVAL; + goto out; + } + + if (ctx->sa.update_pn) { + netdev_err(ctx->netdev, + "MACsec offload update RX sa %d PN isn't supported\n", + assoc_num); + err = -EINVAL; + goto out; + } + + err = macsec_rx_sa_active_update(ctx, rx_sa, ctx_rx_sa->active, + &rx_sc->sc_xarray_element->fs_id); +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_del_rxsa(struct macsec_context *ctx) +{ + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); + struct mlx5e_macsec_device *macsec_device; + sci_t sci = ctx->sa.rx_sa->sc->sci; + struct mlx5e_macsec_rx_sc *rx_sc; + u8 assoc_num = ctx->sa.assoc_num; + struct mlx5e_macsec_sa *rx_sa; + struct mlx5e_macsec *macsec; + struct list_head *list; + int err = 0; + + mutex_lock(&priv->macsec->lock); + + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + list = &macsec_device->macsec_rx_sc_list_head; + rx_sc = mlx5e_macsec_get_rx_sc_from_sc_list(list, sci); + if (!rx_sc) { + netdev_err(ctx->netdev, + "MACsec offload rx_sc sci %lld doesn't exist\n", + ctx->sa.rx_sa->sc->sci); + err = -EINVAL; + goto out; + } + + rx_sa = rx_sc->rx_sa[assoc_num]; + if (!rx_sa) { + netdev_err(ctx->netdev, + "MACsec offload rx_sc sci %lld rx_sa %d doesn't exist\n", + sci, assoc_num); + err = -EINVAL; + goto out; + } + + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false, ctx->secy->netdev, + rx_sc->sc_xarray_element->fs_id); + mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); + kfree(rx_sa); + rx_sc->rx_sa[assoc_num] = NULL; + +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_add_secy(struct macsec_context *ctx) +{ + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); + const struct net_device *dev = ctx->secy->netdev; + const struct net_device *netdev = ctx->netdev; + struct mlx5e_macsec_device *macsec_device; + struct mlx5e_macsec *macsec; + int err = 0; + + if (!mlx5e_macsec_secy_features_validate(ctx)) + return -EINVAL; + + mutex_lock(&priv->macsec->lock); + macsec = priv->macsec; + if (mlx5e_macsec_get_macsec_device_context(macsec, ctx)) { + netdev_err(netdev, "MACsec offload: MACsec net_device already exist\n"); + goto out; + } + + if (macsec->num_of_devices >= MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES) { + netdev_err(netdev, "Currently, only %d MACsec offload devices can be set\n", + MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES); + err = -EBUSY; + goto out; + } + + macsec_device = kzalloc(sizeof(*macsec_device), GFP_KERNEL); + if (!macsec_device) { + err = -ENOMEM; + goto out; + } + + macsec_device->dev_addr = kmemdup(dev->dev_addr, dev->addr_len, GFP_KERNEL); + if (!macsec_device->dev_addr) { + kfree(macsec_device); + err = -ENOMEM; + goto out; + } + + macsec_device->netdev = dev; + + INIT_LIST_HEAD_RCU(&macsec_device->macsec_rx_sc_list_head); + list_add_rcu(&macsec_device->macsec_device_list_element, &macsec->macsec_device_list_head); + + ++macsec->num_of_devices; +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int macsec_upd_secy_hw_address(struct macsec_context *ctx, + struct mlx5e_macsec_device *macsec_device) +{ + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); + const struct net_device *dev = ctx->secy->netdev; + struct mlx5e_macsec *macsec = priv->macsec; + struct mlx5e_macsec_rx_sc *rx_sc, *tmp; + struct mlx5e_macsec_sa *rx_sa; + struct list_head *list; + int i, err = 0; + + + list = &macsec_device->macsec_rx_sc_list_head; + list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) { + for (i = 0; i < MACSEC_NUM_AN; ++i) { + rx_sa = rx_sc->rx_sa[i]; + if (!rx_sa || !rx_sa->macsec_rule) + continue; + + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false, ctx->secy->netdev, + rx_sc->sc_xarray_element->fs_id); + } + } + + list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) { + for (i = 0; i < MACSEC_NUM_AN; ++i) { + rx_sa = rx_sc->rx_sa[i]; + if (!rx_sa) + continue; + + if (rx_sa->active) { + err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false, + &rx_sc->sc_xarray_element->fs_id); + if (err) + goto out; + } + } + } + + memcpy(macsec_device->dev_addr, dev->dev_addr, dev->addr_len); +out: + return err; +} + +/* this function is called from 2 macsec ops functions: + * macsec_set_mac_address – MAC address was changed, therefore we need to destroy + * and create new Tx contexts(macsec object + steering). + * macsec_changelink – in this case the tx SC or SecY may be changed, therefore need to + * destroy Tx and Rx contexts(macsec object + steering) + */ +static int mlx5e_macsec_upd_secy(struct macsec_context *ctx) +{ + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); + const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc; + const struct net_device *dev = ctx->secy->netdev; + struct mlx5e_macsec_device *macsec_device; + struct mlx5e_macsec_sa *tx_sa; + struct mlx5e_macsec *macsec; + int i, err = 0; + + if (!mlx5e_macsec_secy_features_validate(ctx)) + return -EINVAL; + + mutex_lock(&priv->macsec->lock); + + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + goto out; + } + + /* if the dev_addr hasn't change, it mean the callback is from macsec_changelink */ + if (!memcmp(macsec_device->dev_addr, dev->dev_addr, dev->addr_len)) { + err = macsec_upd_secy_hw_address(ctx, macsec_device); + if (err) + goto out; + } + + for (i = 0; i < MACSEC_NUM_AN; ++i) { + tx_sa = macsec_device->tx_sa[i]; + if (!tx_sa) + continue; + + mlx5e_macsec_cleanup_sa(macsec, tx_sa, true, ctx->secy->netdev, 0); + } + + for (i = 0; i < MACSEC_NUM_AN; ++i) { + tx_sa = macsec_device->tx_sa[i]; + if (!tx_sa) + continue; + + if (tx_sa->assoc_num == tx_sc->encoding_sa && tx_sa->active) { + err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true, NULL); + if (err) + goto out; + } + } + +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static int mlx5e_macsec_del_secy(struct macsec_context *ctx) +{ + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); + struct mlx5e_macsec_device *macsec_device; + struct mlx5e_macsec_rx_sc *rx_sc, *tmp; + struct mlx5e_macsec_sa *tx_sa; + struct mlx5e_macsec *macsec; + struct list_head *list; + int err = 0; + int i; + + mutex_lock(&priv->macsec->lock); + macsec = priv->macsec; + macsec_device = mlx5e_macsec_get_macsec_device_context(macsec, ctx); + if (!macsec_device) { + netdev_err(ctx->netdev, "MACsec offload: Failed to find device context\n"); + err = -EINVAL; + + goto out; + } + + for (i = 0; i < MACSEC_NUM_AN; ++i) { + tx_sa = macsec_device->tx_sa[i]; + if (!tx_sa) + continue; + + mlx5e_macsec_cleanup_sa(macsec, tx_sa, true, ctx->secy->netdev, 0); + mlx5_destroy_encryption_key(macsec->mdev, tx_sa->enc_key_id); + kfree(tx_sa); + macsec_device->tx_sa[i] = NULL; + } + + list = &macsec_device->macsec_rx_sc_list_head; + list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) + macsec_del_rxsc_ctx(macsec, rx_sc, ctx->secy->netdev); + + kfree(macsec_device->dev_addr); + macsec_device->dev_addr = NULL; + + list_del_rcu(&macsec_device->macsec_device_list_element); + --macsec->num_of_devices; + kfree(macsec_device); + +out: + mutex_unlock(&macsec->lock); + + return err; +} + +static void macsec_build_accel_attrs(struct mlx5e_macsec_sa *sa, + struct mlx5_macsec_obj_attrs *attrs) +{ + attrs->epn_state.epn_msb = sa->epn_state.epn_msb; + attrs->epn_state.overlap = sa->epn_state.overlap; +} + +static void macsec_aso_build_wqe_ctrl_seg(struct mlx5e_macsec_aso *macsec_aso, + struct mlx5_wqe_aso_ctrl_seg *aso_ctrl, + struct mlx5_aso_ctrl_param *param) +{ + struct mlx5e_macsec_umr *umr = macsec_aso->umr; + + memset(aso_ctrl, 0, sizeof(*aso_ctrl)); + aso_ctrl->va_l = cpu_to_be32(umr->dma_addr | ASO_CTRL_READ_EN); + aso_ctrl->va_h = cpu_to_be32((u64)umr->dma_addr >> 32); + aso_ctrl->l_key = cpu_to_be32(umr->mkey); + + if (!param) + return; + + aso_ctrl->data_mask_mode = param->data_mask_mode << 6; + aso_ctrl->condition_1_0_operand = param->condition_1_operand | + param->condition_0_operand << 4; + aso_ctrl->condition_1_0_offset = param->condition_1_offset | + param->condition_0_offset << 4; + aso_ctrl->data_offset_condition_operand = param->data_offset | + param->condition_operand << 6; + aso_ctrl->condition_0_data = cpu_to_be32(param->condition_0_data); + aso_ctrl->condition_0_mask = cpu_to_be32(param->condition_0_mask); + aso_ctrl->condition_1_data = cpu_to_be32(param->condition_1_data); + aso_ctrl->condition_1_mask = cpu_to_be32(param->condition_1_mask); + aso_ctrl->bitwise_data = cpu_to_be64(param->bitwise_data); + aso_ctrl->data_mask = cpu_to_be64(param->data_mask); +} + +static int mlx5e_macsec_modify_obj(struct mlx5_core_dev *mdev, struct mlx5_macsec_obj_attrs *attrs, + u32 macsec_id) +{ + u32 in[MLX5_ST_SZ_DW(modify_macsec_obj_in)] = {}; + u32 out[MLX5_ST_SZ_DW(query_macsec_obj_out)]; + u64 modify_field_select = 0; + void *obj; + int err; + + /* General object fields set */ + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_MACSEC); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, macsec_id); + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) { + mlx5_core_err(mdev, "Query MACsec object failed (Object id %d), err = %d\n", + macsec_id, err); + return err; + } + + obj = MLX5_ADDR_OF(query_macsec_obj_out, out, macsec_object); + modify_field_select = MLX5_GET64(macsec_offload_obj, obj, modify_field_select); + + /* EPN */ + if (!(modify_field_select & MLX5_MODIFY_MACSEC_BITMASK_EPN_OVERLAP) || + !(modify_field_select & MLX5_MODIFY_MACSEC_BITMASK_EPN_MSB)) { + mlx5_core_dbg(mdev, "MACsec object field is not modifiable (Object id %d)\n", + macsec_id); + return -EOPNOTSUPP; + } + + obj = MLX5_ADDR_OF(modify_macsec_obj_in, in, macsec_object); + MLX5_SET64(macsec_offload_obj, obj, modify_field_select, + MLX5_MODIFY_MACSEC_BITMASK_EPN_OVERLAP | MLX5_MODIFY_MACSEC_BITMASK_EPN_MSB); + MLX5_SET(macsec_offload_obj, obj, epn_msb, attrs->epn_state.epn_msb); + MLX5_SET(macsec_offload_obj, obj, epn_overlap, attrs->epn_state.overlap); + + /* General object fields set */ + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); + + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +static void macsec_aso_build_ctrl(struct mlx5e_macsec_aso *aso, + struct mlx5_wqe_aso_ctrl_seg *aso_ctrl, + struct mlx5e_macsec_aso_in *in) +{ + struct mlx5_aso_ctrl_param param = {}; + + param.data_mask_mode = MLX5_ASO_DATA_MASK_MODE_BITWISE_64BIT; + param.condition_0_operand = MLX5_ASO_ALWAYS_TRUE; + param.condition_1_operand = MLX5_ASO_ALWAYS_TRUE; + if (in->mode == MLX5_MACSEC_EPN) { + param.data_offset = MLX5_MACSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET; + param.bitwise_data = BIT_ULL(54); + param.data_mask = param.bitwise_data; + } + macsec_aso_build_wqe_ctrl_seg(aso, aso_ctrl, ¶m); +} + +static int macsec_aso_set_arm_event(struct mlx5_core_dev *mdev, struct mlx5e_macsec *macsec, + struct mlx5e_macsec_aso_in *in) +{ + struct mlx5e_macsec_aso *aso; + struct mlx5_aso_wqe *aso_wqe; + struct mlx5_aso *maso; + int err; + + aso = &macsec->aso; + maso = aso->maso; + + mutex_lock(&aso->aso_lock); + aso_wqe = mlx5_aso_get_wqe(maso); + mlx5_aso_build_wqe(maso, MLX5_MACSEC_ASO_DS_CNT, aso_wqe, in->obj_id, + MLX5_ACCESS_ASO_OPC_MOD_MACSEC); + macsec_aso_build_ctrl(aso, &aso_wqe->aso_ctrl, in); + mlx5_aso_post_wqe(maso, false, &aso_wqe->ctrl); + err = mlx5_aso_poll_cq(maso, false); + mutex_unlock(&aso->aso_lock); + + return err; +} + +static int macsec_aso_query(struct mlx5_core_dev *mdev, struct mlx5e_macsec *macsec, + struct mlx5e_macsec_aso_in *in, struct mlx5e_macsec_aso_out *out) +{ + struct mlx5e_macsec_aso *aso; + struct mlx5_aso_wqe *aso_wqe; + struct mlx5_aso *maso; + unsigned long expires; + int err; + + aso = &macsec->aso; + maso = aso->maso; + + mutex_lock(&aso->aso_lock); + + aso_wqe = mlx5_aso_get_wqe(maso); + mlx5_aso_build_wqe(maso, MLX5_MACSEC_ASO_DS_CNT, aso_wqe, in->obj_id, + MLX5_ACCESS_ASO_OPC_MOD_MACSEC); + macsec_aso_build_wqe_ctrl_seg(aso, &aso_wqe->aso_ctrl, NULL); + + mlx5_aso_post_wqe(maso, false, &aso_wqe->ctrl); + expires = jiffies + msecs_to_jiffies(10); + do { + err = mlx5_aso_poll_cq(maso, false); + if (err) + usleep_range(2, 10); + } while (err && time_is_after_jiffies(expires)); + + if (err) + goto err_out; + + if (MLX5_GET(macsec_aso, aso->umr->ctx, epn_event_arm)) + out->event_arm |= MLX5E_ASO_EPN_ARM; + + out->mode_param = MLX5_GET(macsec_aso, aso->umr->ctx, mode_parameter); + +err_out: + mutex_unlock(&aso->aso_lock); + return err; +} + +static struct mlx5e_macsec_sa *get_macsec_tx_sa_from_obj_id(const struct mlx5e_macsec *macsec, + const u32 obj_id) +{ + const struct list_head *device_list; + struct mlx5e_macsec_sa *macsec_sa; + struct mlx5e_macsec_device *iter; + int i; + + device_list = &macsec->macsec_device_list_head; + + list_for_each_entry(iter, device_list, macsec_device_list_element) { + for (i = 0; i < MACSEC_NUM_AN; ++i) { + macsec_sa = iter->tx_sa[i]; + if (!macsec_sa || !macsec_sa->active) + continue; + if (macsec_sa->macsec_obj_id == obj_id) + return macsec_sa; + } + } + + return NULL; +} + +static struct mlx5e_macsec_sa *get_macsec_rx_sa_from_obj_id(const struct mlx5e_macsec *macsec, + const u32 obj_id) +{ + const struct list_head *device_list, *sc_list; + struct mlx5e_macsec_rx_sc *mlx5e_rx_sc; + struct mlx5e_macsec_sa *macsec_sa; + struct mlx5e_macsec_device *iter; + int i; + + device_list = &macsec->macsec_device_list_head; + + list_for_each_entry(iter, device_list, macsec_device_list_element) { + sc_list = &iter->macsec_rx_sc_list_head; + list_for_each_entry(mlx5e_rx_sc, sc_list, rx_sc_list_element) { + for (i = 0; i < MACSEC_NUM_AN; ++i) { + macsec_sa = mlx5e_rx_sc->rx_sa[i]; + if (!macsec_sa || !macsec_sa->active) + continue; + if (macsec_sa->macsec_obj_id == obj_id) + return macsec_sa; + } + } + } + + return NULL; +} + +static void macsec_epn_update(struct mlx5e_macsec *macsec, struct mlx5_core_dev *mdev, + struct mlx5e_macsec_sa *sa, u32 obj_id, u32 mode_param) +{ + struct mlx5_macsec_obj_attrs attrs = {}; + struct mlx5e_macsec_aso_in in = {}; + + /* When the bottom of the replay protection window (mode_param) crosses 2^31 (half sequence + * number wraparound) hence mode_param > MLX5_MACSEC_EPN_SCOPE_MID the SW should update the + * esn_overlap to OLD (1). + * When the bottom of the replay protection window (mode_param) crosses 2^32 (full sequence + * number wraparound) hence mode_param < MLX5_MACSEC_EPN_SCOPE_MID since it did a + * wraparound, the SW should update the esn_overlap to NEW (0), and increment the esn_msb. + */ + + if (mode_param < MLX5_MACSEC_EPN_SCOPE_MID) { + sa->epn_state.epn_msb++; + sa->epn_state.overlap = 0; + } else { + sa->epn_state.overlap = 1; + } + + macsec_build_accel_attrs(sa, &attrs); + mlx5e_macsec_modify_obj(mdev, &attrs, obj_id); + + /* Re-set EPN arm event */ + in.obj_id = obj_id; + in.mode = MLX5_MACSEC_EPN; + macsec_aso_set_arm_event(mdev, macsec, &in); +} + +static void macsec_async_event(struct work_struct *work) +{ + struct mlx5e_macsec_async_work *async_work; + struct mlx5e_macsec_aso_out out = {}; + struct mlx5e_macsec_aso_in in = {}; + struct mlx5e_macsec_sa *macsec_sa; + struct mlx5e_macsec *macsec; + struct mlx5_core_dev *mdev; + u32 obj_id; + + async_work = container_of(work, struct mlx5e_macsec_async_work, work); + macsec = async_work->macsec; + mutex_lock(&macsec->lock); + + mdev = async_work->mdev; + obj_id = async_work->obj_id; + macsec_sa = get_macsec_tx_sa_from_obj_id(macsec, obj_id); + if (!macsec_sa) { + macsec_sa = get_macsec_rx_sa_from_obj_id(macsec, obj_id); + if (!macsec_sa) { + mlx5_core_dbg(mdev, "MACsec SA is not found (SA object id %d)\n", obj_id); + goto out_async_work; + } + } + + /* Query MACsec ASO context */ + in.obj_id = obj_id; + macsec_aso_query(mdev, macsec, &in, &out); + + /* EPN case */ + if (macsec_sa->epn_state.epn_enabled && !(out.event_arm & MLX5E_ASO_EPN_ARM)) + macsec_epn_update(macsec, mdev, macsec_sa, obj_id, out.mode_param); + +out_async_work: + kfree(async_work); + mutex_unlock(&macsec->lock); +} + +static int macsec_obj_change_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5e_macsec *macsec = container_of(nb, struct mlx5e_macsec, nb); + struct mlx5e_macsec_async_work *async_work; + struct mlx5_eqe_obj_change *obj_change; + struct mlx5_eqe *eqe = data; + u16 obj_type; + u32 obj_id; + + if (event != MLX5_EVENT_TYPE_OBJECT_CHANGE) + return NOTIFY_DONE; + + obj_change = &eqe->data.obj_change; + obj_type = be16_to_cpu(obj_change->obj_type); + obj_id = be32_to_cpu(obj_change->obj_id); + + if (obj_type != MLX5_GENERAL_OBJECT_TYPES_MACSEC) + return NOTIFY_DONE; + + async_work = kzalloc(sizeof(*async_work), GFP_ATOMIC); + if (!async_work) + return NOTIFY_DONE; + + async_work->macsec = macsec; + async_work->mdev = macsec->mdev; + async_work->obj_id = obj_id; + + INIT_WORK(&async_work->work, macsec_async_event); + + WARN_ON(!queue_work(macsec->wq, &async_work->work)); + + return NOTIFY_OK; +} + +static int mlx5e_macsec_aso_init(struct mlx5e_macsec_aso *aso, struct mlx5_core_dev *mdev) +{ + struct mlx5_aso *maso; + int err; + + err = mlx5_core_alloc_pd(mdev, &aso->pdn); + if (err) { + mlx5_core_err(mdev, + "MACsec offload: Failed to alloc pd for MACsec ASO, err=%d\n", + err); + return err; + } + + maso = mlx5_aso_create(mdev, aso->pdn); + if (IS_ERR(maso)) { + err = PTR_ERR(maso); + goto err_aso; + } + + err = mlx5e_macsec_aso_reg_mr(mdev, aso); + if (err) + goto err_aso_reg; + + mutex_init(&aso->aso_lock); + + aso->maso = maso; + + return 0; + +err_aso_reg: + mlx5_aso_destroy(maso); +err_aso: + mlx5_core_dealloc_pd(mdev, aso->pdn); + return err; +} + +static void mlx5e_macsec_aso_cleanup(struct mlx5e_macsec_aso *aso, struct mlx5_core_dev *mdev) +{ + if (!aso) + return; + + mlx5e_macsec_aso_dereg_mr(mdev, aso); + + mlx5_aso_destroy(aso->maso); + + mlx5_core_dealloc_pd(mdev, aso->pdn); +} + +static const struct macsec_ops macsec_offload_ops = { + .mdo_add_txsa = mlx5e_macsec_add_txsa, + .mdo_upd_txsa = mlx5e_macsec_upd_txsa, + .mdo_del_txsa = mlx5e_macsec_del_txsa, + .mdo_add_rxsc = mlx5e_macsec_add_rxsc, + .mdo_upd_rxsc = mlx5e_macsec_upd_rxsc, + .mdo_del_rxsc = mlx5e_macsec_del_rxsc, + .mdo_add_rxsa = mlx5e_macsec_add_rxsa, + .mdo_upd_rxsa = mlx5e_macsec_upd_rxsa, + .mdo_del_rxsa = mlx5e_macsec_del_rxsa, + .mdo_add_secy = mlx5e_macsec_add_secy, + .mdo_upd_secy = mlx5e_macsec_upd_secy, + .mdo_del_secy = mlx5e_macsec_del_secy, +}; + +bool mlx5e_macsec_handle_tx_skb(struct mlx5e_macsec *macsec, struct sk_buff *skb) +{ + struct metadata_dst *md_dst = skb_metadata_dst(skb); + u32 fs_id; + + fs_id = mlx5_macsec_fs_get_fs_id_from_hashtable(macsec->mdev->macsec_fs, + &md_dst->u.macsec_info.sci); + if (!fs_id) + goto err_out; + + return true; + +err_out: + dev_kfree_skb_any(skb); + return false; +} + +void mlx5e_macsec_tx_build_eseg(struct mlx5e_macsec *macsec, + struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg) +{ + struct metadata_dst *md_dst = skb_metadata_dst(skb); + u32 fs_id; + + fs_id = mlx5_macsec_fs_get_fs_id_from_hashtable(macsec->mdev->macsec_fs, + &md_dst->u.macsec_info.sci); + if (!fs_id) + return; + + eseg->flow_table_metadata = cpu_to_be32(MLX5_ETH_WQE_FT_META_MACSEC | fs_id << 2); +} + +void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev, + struct sk_buff *skb, + struct mlx5_cqe64 *cqe) +{ + struct mlx5e_macsec_rx_sc_xarray_element *sc_xarray_element; + u32 macsec_meta_data = be32_to_cpu(cqe->ft_metadata); + struct mlx5e_priv *priv = macsec_netdev_priv(netdev); + struct mlx5e_macsec_rx_sc *rx_sc; + struct mlx5e_macsec *macsec; + u32 fs_id; + + macsec = priv->macsec; + if (!macsec) + return; + + fs_id = MLX5_MACSEC_RX_METADAT_HANDLE(macsec_meta_data); + + rcu_read_lock(); + sc_xarray_element = xa_load(&macsec->sc_xarray, fs_id); + rx_sc = sc_xarray_element->rx_sc; + if (rx_sc) { + dst_hold(&rx_sc->md_dst->dst); + skb_dst_set(skb, &rx_sc->md_dst->dst); + } + + rcu_read_unlock(); +} + +void mlx5e_macsec_build_netdev(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + + if (!mlx5e_is_macsec_device(priv->mdev)) + return; + + /* Enable MACsec */ + mlx5_core_dbg(priv->mdev, "mlx5e: MACsec acceleration enabled\n"); + netdev->macsec_ops = &macsec_offload_ops; + netdev->features |= NETIF_F_HW_MACSEC; + netif_keep_dst(netdev); +} + +int mlx5e_macsec_init(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_macsec *macsec = NULL; + struct mlx5_macsec_fs *macsec_fs; + int err; + + if (!mlx5e_is_macsec_device(priv->mdev)) { + mlx5_core_dbg(mdev, "Not a MACsec offload device\n"); + return 0; + } + + macsec = kzalloc(sizeof(*macsec), GFP_KERNEL); + if (!macsec) + return -ENOMEM; + + INIT_LIST_HEAD(&macsec->macsec_device_list_head); + mutex_init(&macsec->lock); + + err = mlx5e_macsec_aso_init(&macsec->aso, priv->mdev); + if (err) { + mlx5_core_err(mdev, "MACsec offload: Failed to init aso, err=%d\n", err); + goto err_aso; + } + + macsec->wq = alloc_ordered_workqueue("mlx5e_macsec_%s", 0, priv->netdev->name); + if (!macsec->wq) { + err = -ENOMEM; + goto err_wq; + } + + xa_init_flags(&macsec->sc_xarray, XA_FLAGS_ALLOC1); + + priv->macsec = macsec; + + macsec->mdev = mdev; + + macsec_fs = mlx5_macsec_fs_init(mdev); + if (!macsec_fs) { + err = -ENOMEM; + goto err_out; + } + + mdev->macsec_fs = macsec_fs; + + macsec->nb.notifier_call = macsec_obj_change_event; + mlx5_notifier_register(mdev, &macsec->nb); + + mlx5_core_dbg(mdev, "MACsec attached to netdevice\n"); + + return 0; + +err_out: + destroy_workqueue(macsec->wq); +err_wq: + mlx5e_macsec_aso_cleanup(&macsec->aso, priv->mdev); +err_aso: + kfree(macsec); + priv->macsec = NULL; + return err; +} + +void mlx5e_macsec_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5e_macsec *macsec = priv->macsec; + struct mlx5_core_dev *mdev = priv->mdev; + + if (!macsec) + return; + + mlx5_notifier_unregister(mdev, &macsec->nb); + mlx5_macsec_fs_cleanup(mdev->macsec_fs); + destroy_workqueue(macsec->wq); + mlx5e_macsec_aso_cleanup(&macsec->aso, mdev); + mutex_destroy(&macsec->lock); + kfree(macsec); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h new file mode 100644 index 0000000000..27df72e231 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_ACCEL_MACSEC_H__ +#define __MLX5_EN_ACCEL_MACSEC_H__ + +#ifdef CONFIG_MLX5_MACSEC + +#include +#include +#include +#include "lib/macsec_fs.h" + +struct mlx5e_priv; +struct mlx5e_macsec; + +void mlx5e_macsec_build_netdev(struct mlx5e_priv *priv); +int mlx5e_macsec_init(struct mlx5e_priv *priv); +void mlx5e_macsec_cleanup(struct mlx5e_priv *priv); +bool mlx5e_macsec_handle_tx_skb(struct mlx5e_macsec *macsec, struct sk_buff *skb); +void mlx5e_macsec_tx_build_eseg(struct mlx5e_macsec *macsec, + struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg); + +static inline bool mlx5e_macsec_skb_is_offload(struct sk_buff *skb) +{ + struct metadata_dst *md_dst = skb_metadata_dst(skb); + + return md_dst && (md_dst->type == METADATA_MACSEC); +} + +static inline bool mlx5e_macsec_is_rx_flow(struct mlx5_cqe64 *cqe) +{ + return MLX5_MACSEC_METADATA_MARKER(be32_to_cpu(cqe->ft_metadata)); +} + +void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb, + struct mlx5_cqe64 *cqe); + +#else + +static inline void mlx5e_macsec_build_netdev(struct mlx5e_priv *priv) {} +static inline int mlx5e_macsec_init(struct mlx5e_priv *priv) { return 0; } +static inline void mlx5e_macsec_cleanup(struct mlx5e_priv *priv) {} +static inline bool mlx5e_macsec_skb_is_offload(struct sk_buff *skb) { return false; } +static inline bool mlx5e_macsec_is_rx_flow(struct mlx5_cqe64 *cqe) { return false; } +static inline void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev, + struct sk_buff *skb, + struct mlx5_cqe64 *cqe) +{} +#endif /* CONFIG_MLX5_MACSEC */ + +#endif /* __MLX5_ACCEL_EN_MACSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c new file mode 100644 index 0000000000..4559ee16a1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_stats.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include +#include + +#include "en.h" +#include "en_accel/macsec.h" + +static const struct counter_desc mlx5e_macsec_hw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5_macsec_stats, macsec_rx_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5_macsec_stats, macsec_rx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5_macsec_stats, macsec_rx_pkts_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5_macsec_stats, macsec_rx_bytes_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5_macsec_stats, macsec_tx_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5_macsec_stats, macsec_tx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5_macsec_stats, macsec_tx_pkts_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5_macsec_stats, macsec_tx_bytes_drop) }, +}; + +#define NUM_MACSEC_HW_COUNTERS ARRAY_SIZE(mlx5e_macsec_hw_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(macsec_hw) +{ + if (!priv->macsec) + return 0; + + if (mlx5e_is_macsec_device(priv->mdev)) + return NUM_MACSEC_HW_COUNTERS; + + return 0; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(macsec_hw) {} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(macsec_hw) +{ + unsigned int i; + + if (!priv->macsec) + return idx; + + if (!mlx5e_is_macsec_device(priv->mdev)) + return idx; + + for (i = 0; i < NUM_MACSEC_HW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + mlx5e_macsec_hw_stats_desc[i].format); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(macsec_hw) +{ + struct mlx5_macsec_fs *macsec_fs; + int i; + + if (!priv->macsec) + return idx; + + if (!mlx5e_is_macsec_device(priv->mdev)) + return idx; + + macsec_fs = priv->mdev->macsec_fs; + mlx5_macsec_fs_get_stats_fill(macsec_fs, mlx5_macsec_fs_get_stats(macsec_fs)); + for (i = 0; i < NUM_MACSEC_HW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_macsec_fs_get_stats(macsec_fs), + mlx5e_macsec_hw_stats_desc, + i); + + return idx; +} + +MLX5E_DEFINE_STATS_GRP(macsec_hw, 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c new file mode 100644 index 0000000000..e66f486faa --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -0,0 +1,776 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include "en.h" + +#define ARFS_HASH_SHIFT BITS_PER_BYTE +#define ARFS_HASH_SIZE BIT(BITS_PER_BYTE) + +struct arfs_table { + struct mlx5e_flow_table ft; + struct mlx5_flow_handle *default_rule; + struct hlist_head rules_hash[ARFS_HASH_SIZE]; +}; + +enum arfs_type { + ARFS_IPV4_TCP, + ARFS_IPV6_TCP, + ARFS_IPV4_UDP, + ARFS_IPV6_UDP, + ARFS_NUM_TYPES, +}; + +struct mlx5e_arfs_tables { + struct arfs_table arfs_tables[ARFS_NUM_TYPES]; + /* Protect aRFS rules list */ + spinlock_t arfs_lock; + int last_filter_id; + struct workqueue_struct *wq; +}; + +struct arfs_tuple { + __be16 etype; + u8 ip_proto; + union { + __be32 src_ipv4; + struct in6_addr src_ipv6; + }; + union { + __be32 dst_ipv4; + struct in6_addr dst_ipv6; + }; + __be16 src_port; + __be16 dst_port; +}; + +struct arfs_rule { + struct mlx5e_priv *priv; + struct work_struct arfs_work; + struct mlx5_flow_handle *rule; + struct hlist_node hlist; + int rxq; + /* Flow ID passed to ndo_rx_flow_steer */ + int flow_id; + /* Filter ID returned by ndo_rx_flow_steer */ + int filter_id; + struct arfs_tuple tuple; +}; + +#define mlx5e_for_each_arfs_rule(hn, tmp, arfs_tables, i, j) \ + for (i = 0; i < ARFS_NUM_TYPES; i++) \ + mlx5e_for_each_hash_arfs_rule(hn, tmp, arfs_tables[i].rules_hash, j) + +#define mlx5e_for_each_hash_arfs_rule(hn, tmp, hash, j) \ + for (j = 0; j < ARFS_HASH_SIZE; j++) \ + hlist_for_each_entry_safe(hn, tmp, &hash[j], hlist) + +static enum mlx5_traffic_types arfs_get_tt(enum arfs_type type) +{ + switch (type) { + case ARFS_IPV4_TCP: + return MLX5_TT_IPV4_TCP; + case ARFS_IPV4_UDP: + return MLX5_TT_IPV4_UDP; + case ARFS_IPV6_TCP: + return MLX5_TT_IPV6_TCP; + case ARFS_IPV6_UDP: + return MLX5_TT_IPV6_UDP; + default: + return -EINVAL; + } +} + +static int arfs_disable(struct mlx5e_flow_steering *fs) +{ + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + int err, i; + + for (i = 0; i < ARFS_NUM_TYPES; i++) { + /* Modify ttc rules destination back to their default */ + err = mlx5_ttc_fwd_default_dest(ttc, arfs_get_tt(i)); + if (err) { + fs_err(fs, + "%s: modify ttc[%d] default destination failed, err(%d)\n", + __func__, arfs_get_tt(i), err); + return err; + } + } + return 0; +} + +static void arfs_del_rules(struct mlx5e_flow_steering *fs); + +int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs) +{ + /* Moving to switchdev mode, fs->arfs is freed by mlx5e_nic_profile + * cleanup_rx callback and it is not recreated when + * mlx5e_uplink_rep_profile is loaded as mlx5e_create_flow_steering() + * is not called by the uplink_rep profile init_rx callback. Thus, if + * ntuple is set, moving to switchdev flow will enter this function + * with fs->arfs nullified. + */ + if (!mlx5e_fs_get_arfs(fs)) + return 0; + + arfs_del_rules(fs); + + return arfs_disable(fs); +} + +int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs) +{ + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false); + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs); + struct mlx5_flow_destination dest = {}; + int err, i; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + for (i = 0; i < ARFS_NUM_TYPES; i++) { + dest.ft = arfs->arfs_tables[i].ft.t; + /* Modify ttc rules destination to point on the aRFS FTs */ + err = mlx5_ttc_fwd_dest(ttc, arfs_get_tt(i), &dest); + if (err) { + fs_err(fs, "%s: modify ttc[%d] dest to arfs, failed err(%d)\n", + __func__, arfs_get_tt(i), err); + arfs_disable(fs); + return err; + } + } + return 0; +} + +static void arfs_destroy_table(struct arfs_table *arfs_t) +{ + mlx5_del_flow_rules(arfs_t->default_rule); + mlx5e_destroy_flow_table(&arfs_t->ft); +} + +static void _mlx5e_cleanup_tables(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs); + int i; + + arfs_del_rules(fs); + destroy_workqueue(arfs->wq); + for (i = 0; i < ARFS_NUM_TYPES; i++) { + if (!IS_ERR_OR_NULL(arfs->arfs_tables[i].ft.t)) + arfs_destroy_table(&arfs->arfs_tables[i]); + } +} + +void mlx5e_arfs_destroy_tables(struct mlx5e_flow_steering *fs, bool ntuple) +{ + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs); + + if (!ntuple) + return; + + _mlx5e_cleanup_tables(fs); + mlx5e_fs_set_arfs(fs, NULL); + kvfree(arfs); +} + +static int arfs_add_default_rule(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, + enum arfs_type type) +{ + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs); + struct arfs_table *arfs_t = &arfs->arfs_tables[type]; + struct mlx5_flow_destination dest = {}; + MLX5_DECLARE_FLOW_ACT(flow_act); + enum mlx5_traffic_types tt; + int err = 0; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + tt = arfs_get_tt(type); + if (tt == -EINVAL) { + fs_err(fs, "%s: bad arfs_type: %d\n", __func__, type); + return -EINVAL; + } + + /* FIXME: Must use mlx5_ttc_get_default_dest(), + * but can't since TTC default is not setup yet ! + */ + dest.tir_num = mlx5e_rx_res_get_tirn_rss(rx_res, tt); + arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, NULL, + &flow_act, + &dest, 1); + if (IS_ERR(arfs_t->default_rule)) { + err = PTR_ERR(arfs_t->default_rule); + arfs_t->default_rule = NULL; + fs_err(fs, "%s: add rule failed, arfs type=%d\n", __func__, type); + } + + return err; +} + +#define MLX5E_ARFS_NUM_GROUPS 2 +#define MLX5E_ARFS_GROUP1_SIZE (BIT(16) - 1) +#define MLX5E_ARFS_GROUP2_SIZE BIT(0) +#define MLX5E_ARFS_TABLE_SIZE (MLX5E_ARFS_GROUP1_SIZE +\ + MLX5E_ARFS_GROUP2_SIZE) +static int arfs_create_groups(struct mlx5e_flow_table *ft, + enum arfs_type type) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *outer_headers_c; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_ARFS_NUM_GROUPS, + sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_free_g; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, + outer_headers); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ethertype); + switch (type) { + case ARFS_IPV4_TCP: + case ARFS_IPV6_TCP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport); + break; + case ARFS_IPV4_UDP: + case ARFS_IPV6_UDP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_sport); + break; + default: + err = -EINVAL; + goto err_free_in; + } + + switch (type) { + case ARFS_IPV4_TCP: + case ARFS_IPV4_UDP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + break; + case ARFS_IPV6_TCP: + case ARFS_IPV6_UDP: + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + 0xff, 16); + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xff, 16); + break; + default: + err = -EINVAL; + goto err_free_in; + } + + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_ARFS_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_clean_group; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_ARFS_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_clean_group; + ft->num_groups++; + + kvfree(in); + return 0; + +err_clean_group: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; +err_free_in: + kvfree(in); +err_free_g: + kfree(ft->g); + ft->g = NULL; + return err; +} + +static int arfs_create_table(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, + enum arfs_type type) +{ + struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false); + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs); + struct mlx5e_flow_table *ft = &arfs->arfs_tables[type].ft; + struct mlx5_flow_table_attr ft_attr = {}; + int err; + + ft->num_groups = 0; + + ft_attr.max_fte = MLX5E_ARFS_TABLE_SIZE; + ft_attr.level = MLX5E_ARFS_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + err = arfs_create_groups(ft, type); + if (err) + goto err; + + err = arfs_add_default_rule(fs, rx_res, type); + if (err) + goto err; + + return 0; +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +int mlx5e_arfs_create_tables(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, bool ntuple) +{ + struct mlx5e_arfs_tables *arfs; + int err = -ENOMEM; + int i; + + if (!ntuple) + return 0; + + arfs = kvzalloc(sizeof(*arfs), GFP_KERNEL); + if (!arfs) + return -ENOMEM; + + spin_lock_init(&arfs->arfs_lock); + arfs->wq = create_singlethread_workqueue("mlx5e_arfs"); + if (!arfs->wq) + goto err; + + mlx5e_fs_set_arfs(fs, arfs); + + for (i = 0; i < ARFS_NUM_TYPES; i++) { + err = arfs_create_table(fs, rx_res, i); + if (err) + goto err_des; + } + return 0; + +err_des: + _mlx5e_cleanup_tables(fs); +err: + mlx5e_fs_set_arfs(fs, NULL); + kvfree(arfs); + return err; +} + +#define MLX5E_ARFS_EXPIRY_QUOTA 60 + +static void arfs_may_expire_flow(struct mlx5e_priv *priv) +{ + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(priv->fs); + struct arfs_rule *arfs_rule; + struct hlist_node *htmp; + HLIST_HEAD(del_list); + int quota = 0; + int i; + int j; + + spin_lock_bh(&arfs->arfs_lock); + mlx5e_for_each_arfs_rule(arfs_rule, htmp, arfs->arfs_tables, i, j) { + if (!work_pending(&arfs_rule->arfs_work) && + rps_may_expire_flow(priv->netdev, + arfs_rule->rxq, arfs_rule->flow_id, + arfs_rule->filter_id)) { + hlist_del_init(&arfs_rule->hlist); + hlist_add_head(&arfs_rule->hlist, &del_list); + if (quota++ > MLX5E_ARFS_EXPIRY_QUOTA) + break; + } + } + spin_unlock_bh(&arfs->arfs_lock); + hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) { + if (arfs_rule->rule) { + mlx5_del_flow_rules(arfs_rule->rule); + priv->channel_stats[arfs_rule->rxq]->rq.arfs_expired++; + } + hlist_del(&arfs_rule->hlist); + kfree(arfs_rule); + } +} + +static void arfs_del_rules(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(fs); + struct hlist_node *htmp; + struct arfs_rule *rule; + HLIST_HEAD(del_list); + int i; + int j; + + spin_lock_bh(&arfs->arfs_lock); + mlx5e_for_each_arfs_rule(rule, htmp, arfs->arfs_tables, i, j) { + hlist_del_init(&rule->hlist); + hlist_add_head(&rule->hlist, &del_list); + } + spin_unlock_bh(&arfs->arfs_lock); + + hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) { + cancel_work_sync(&rule->arfs_work); + if (rule->rule) + mlx5_del_flow_rules(rule->rule); + hlist_del(&rule->hlist); + kfree(rule); + } +} + +static struct hlist_head * +arfs_hash_bucket(struct arfs_table *arfs_t, __be16 src_port, + __be16 dst_port) +{ + unsigned long l; + int bucket_idx; + + l = (__force unsigned long)src_port | + ((__force unsigned long)dst_port << 2); + + bucket_idx = hash_long(l, ARFS_HASH_SHIFT); + + return &arfs_t->rules_hash[bucket_idx]; +} + +static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs, + u8 ip_proto, __be16 etype) +{ + if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_TCP) + return &arfs->arfs_tables[ARFS_IPV4_TCP]; + if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_UDP) + return &arfs->arfs_tables[ARFS_IPV4_UDP]; + if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_TCP) + return &arfs->arfs_tables[ARFS_IPV6_TCP]; + if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_UDP) + return &arfs->arfs_tables[ARFS_IPV6_UDP]; + + return NULL; +} + +static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, + struct arfs_rule *arfs_rule) +{ + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(priv->fs); + struct arfs_tuple *tuple = &arfs_rule->tuple; + struct mlx5_flow_handle *rule = NULL; + struct mlx5_flow_destination dest = {}; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct arfs_table *arfs_table; + struct mlx5_flow_spec *spec; + struct mlx5_flow_table *ft; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + priv->channel_stats[arfs_rule->rxq]->rq.arfs_err++; + err = -ENOMEM; + goto out; + } + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, + ntohs(tuple->etype)); + arfs_table = arfs_get_table(arfs, tuple->ip_proto, tuple->etype); + if (!arfs_table) { + WARN_ONCE(1, "arfs table does not exist for etype %u and ip_proto %u\n", + tuple->etype, tuple->ip_proto); + err = -EINVAL; + goto out; + } + + ft = arfs_table->ft.t; + if (tuple->ip_proto == IPPROTO_TCP) { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.tcp_dport); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.tcp_sport); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_dport, + ntohs(tuple->dst_port)); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_sport, + ntohs(tuple->src_port)); + } else { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.udp_dport); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.udp_sport); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport, + ntohs(tuple->dst_port)); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_sport, + ntohs(tuple->src_port)); + } + if (tuple->etype == htons(ETH_P_IP)) { + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4), + &tuple->src_ipv4, + 4); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &tuple->dst_ipv4, + 4); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + } else { + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), + &tuple->src_ipv6, + 16); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &tuple->dst_ipv6, + 16); + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), + 0xff, + 16); + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xff, + 16); + } + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, arfs_rule->rxq); + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + priv->channel_stats[arfs_rule->rxq]->rq.arfs_err++; + netdev_dbg(priv->netdev, + "%s: add rule(filter id=%d, rq idx=%d, ip proto=0x%x) failed,err=%d\n", + __func__, arfs_rule->filter_id, arfs_rule->rxq, + tuple->ip_proto, err); + } + +out: + kvfree(spec); + return err ? ERR_PTR(err) : rule; +} + +static void arfs_modify_rule_rq(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, u16 rxq) +{ + struct mlx5_flow_destination dst = {}; + int err = 0; + + dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dst.tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, rxq); + err = mlx5_modify_rule_destination(rule, &dst, NULL); + if (err) { + priv->channel_stats[rxq]->rq.arfs_err++; + netdev_warn(priv->netdev, + "Failed to modify aRFS rule destination to rq=%d\n", rxq); + } +} + +static void arfs_handle_work(struct work_struct *work) +{ + struct arfs_rule *arfs_rule = container_of(work, + struct arfs_rule, + arfs_work); + struct mlx5e_priv *priv = arfs_rule->priv; + struct mlx5e_arfs_tables *arfs; + struct mlx5_flow_handle *rule; + + arfs = mlx5e_fs_get_arfs(priv->fs); + mutex_lock(&priv->state_lock); + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + spin_lock_bh(&arfs->arfs_lock); + hlist_del(&arfs_rule->hlist); + spin_unlock_bh(&arfs->arfs_lock); + + mutex_unlock(&priv->state_lock); + kfree(arfs_rule); + goto out; + } + mutex_unlock(&priv->state_lock); + + if (!arfs_rule->rule) { + rule = arfs_add_rule(priv, arfs_rule); + if (IS_ERR(rule)) + goto out; + arfs_rule->rule = rule; + priv->channel_stats[arfs_rule->rxq]->rq.arfs_add++; + } else { + arfs_modify_rule_rq(priv, arfs_rule->rule, + arfs_rule->rxq); + } +out: + arfs_may_expire_flow(priv); +} + +static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv, + struct arfs_table *arfs_t, + const struct flow_keys *fk, + u16 rxq, u32 flow_id) +{ + struct mlx5e_arfs_tables *arfs = mlx5e_fs_get_arfs(priv->fs); + struct arfs_rule *rule; + struct arfs_tuple *tuple; + + rule = kzalloc(sizeof(*rule), GFP_ATOMIC); + if (!rule) { + priv->channel_stats[rxq]->rq.arfs_err++; + return NULL; + } + + rule->priv = priv; + rule->rxq = rxq; + INIT_WORK(&rule->arfs_work, arfs_handle_work); + + tuple = &rule->tuple; + tuple->etype = fk->basic.n_proto; + tuple->ip_proto = fk->basic.ip_proto; + if (tuple->etype == htons(ETH_P_IP)) { + tuple->src_ipv4 = fk->addrs.v4addrs.src; + tuple->dst_ipv4 = fk->addrs.v4addrs.dst; + } else { + memcpy(&tuple->src_ipv6, &fk->addrs.v6addrs.src, + sizeof(struct in6_addr)); + memcpy(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst, + sizeof(struct in6_addr)); + } + tuple->src_port = fk->ports.src; + tuple->dst_port = fk->ports.dst; + + rule->flow_id = flow_id; + rule->filter_id = arfs->last_filter_id++ % RPS_NO_FILTER; + + hlist_add_head(&rule->hlist, + arfs_hash_bucket(arfs_t, tuple->src_port, + tuple->dst_port)); + return rule; +} + +static bool arfs_cmp(const struct arfs_tuple *tuple, const struct flow_keys *fk) +{ + if (tuple->src_port != fk->ports.src || tuple->dst_port != fk->ports.dst) + return false; + if (tuple->etype != fk->basic.n_proto) + return false; + if (tuple->etype == htons(ETH_P_IP)) + return tuple->src_ipv4 == fk->addrs.v4addrs.src && + tuple->dst_ipv4 == fk->addrs.v4addrs.dst; + if (tuple->etype == htons(ETH_P_IPV6)) + return !memcmp(&tuple->src_ipv6, &fk->addrs.v6addrs.src, + sizeof(struct in6_addr)) && + !memcmp(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst, + sizeof(struct in6_addr)); + return false; +} + +static struct arfs_rule *arfs_find_rule(struct arfs_table *arfs_t, + const struct flow_keys *fk) +{ + struct arfs_rule *arfs_rule; + struct hlist_head *head; + + head = arfs_hash_bucket(arfs_t, fk->ports.src, fk->ports.dst); + hlist_for_each_entry(arfs_rule, head, hlist) { + if (arfs_cmp(&arfs_rule->tuple, fk)) + return arfs_rule; + } + + return NULL; +} + +int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, + u16 rxq_index, u32 flow_id) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_arfs_tables *arfs; + struct arfs_rule *arfs_rule; + struct arfs_table *arfs_t; + struct flow_keys fk; + + arfs = mlx5e_fs_get_arfs(priv->fs); + if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) + return -EPROTONOSUPPORT; + + if (fk.basic.n_proto != htons(ETH_P_IP) && + fk.basic.n_proto != htons(ETH_P_IPV6)) + return -EPROTONOSUPPORT; + + if (skb->encapsulation) + return -EPROTONOSUPPORT; + + arfs_t = arfs_get_table(arfs, fk.basic.ip_proto, fk.basic.n_proto); + if (!arfs_t) + return -EPROTONOSUPPORT; + + spin_lock_bh(&arfs->arfs_lock); + arfs_rule = arfs_find_rule(arfs_t, &fk); + if (arfs_rule) { + if (arfs_rule->rxq == rxq_index || work_busy(&arfs_rule->arfs_work)) { + spin_unlock_bh(&arfs->arfs_lock); + return arfs_rule->filter_id; + } + + priv->channel_stats[rxq_index]->rq.arfs_request_in++; + priv->channel_stats[arfs_rule->rxq]->rq.arfs_request_out++; + arfs_rule->rxq = rxq_index; + } else { + arfs_rule = arfs_alloc_rule(priv, arfs_t, &fk, rxq_index, flow_id); + if (!arfs_rule) { + spin_unlock_bh(&arfs->arfs_lock); + return -ENOMEM; + } + } + queue_work(arfs->wq, &arfs_rule->arfs_work); + spin_unlock_bh(&arfs->arfs_lock); + return arfs_rule->filter_id; +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c new file mode 100644 index 0000000000..41c396e764 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "en.h" +#include "lib/crypto.h" + +/* mlx5e global resources should be placed in this file. + * Global resources are common to all the netdevices created on the same nic. + */ + +void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc) +{ + bool ro_write = MLX5_CAP_GEN(mdev, relaxed_ordering_write); + bool ro_read = MLX5_CAP_GEN(mdev, relaxed_ordering_read) || + (pcie_relaxed_ordering_enabled(mdev->pdev) && + MLX5_CAP_GEN(mdev, relaxed_ordering_read_pci_enabled)); + + MLX5_SET(mkc, mkc, relaxed_ordering_read, ro_read); + MLX5_SET(mkc, mkc, relaxed_ordering_write, ro_write); +} + +int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey) +{ + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + void *mkc; + u32 *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + mlx5e_mkey_set_relaxed_ordering(mdev, mkc); + MLX5_SET(mkc, mkc, pd, pdn); + MLX5_SET(mkc, mkc, length64, 1); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + + err = mlx5_core_create_mkey(mdev, mkey, in, inlen); + + kvfree(in); + return err; +} + +int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) +{ + struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs; + int err; + + err = mlx5_core_alloc_pd(mdev, &res->pdn); + if (err) { + mlx5_core_err(mdev, "alloc pd failed, %d\n", err); + return err; + } + + err = mlx5_core_alloc_transport_domain(mdev, &res->td.tdn); + if (err) { + mlx5_core_err(mdev, "alloc td failed, %d\n", err); + goto err_dealloc_pd; + } + + err = mlx5e_create_mkey(mdev, res->pdn, &res->mkey); + if (err) { + mlx5_core_err(mdev, "create mkey failed, %d\n", err); + goto err_dealloc_transport_domain; + } + + err = mlx5_alloc_bfreg(mdev, &res->bfreg, false, false); + if (err) { + mlx5_core_err(mdev, "alloc bfreg failed, %d\n", err); + goto err_destroy_mkey; + } + + INIT_LIST_HEAD(&res->td.tirs_list); + mutex_init(&res->td.list_lock); + + mdev->mlx5e_res.dek_priv = mlx5_crypto_dek_init(mdev); + if (IS_ERR(mdev->mlx5e_res.dek_priv)) { + mlx5_core_err(mdev, "crypto dek init failed, %ld\n", + PTR_ERR(mdev->mlx5e_res.dek_priv)); + mdev->mlx5e_res.dek_priv = NULL; + } + + return 0; + +err_destroy_mkey: + mlx5_core_destroy_mkey(mdev, res->mkey); +err_dealloc_transport_domain: + mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); +err_dealloc_pd: + mlx5_core_dealloc_pd(mdev, res->pdn); + return err; +} + +void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) +{ + struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs; + + mlx5_crypto_dek_cleanup(mdev->mlx5e_res.dek_priv); + mdev->mlx5e_res.dek_priv = NULL; + mlx5_free_bfreg(mdev, &res->bfreg); + mlx5_core_destroy_mkey(mdev, res->mkey); + mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); + mlx5_core_dealloc_pd(mdev, res->pdn); + memset(res, 0, sizeof(*res)); +} + +int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb, + bool enable_mc_lb) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_tir *tir; + u8 lb_flags = 0; + int err = 0; + u32 tirn = 0; + int inlen; + void *in; + + inlen = MLX5_ST_SZ_BYTES(modify_tir_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + if (enable_uc_lb) + lb_flags = MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; + + if (enable_mc_lb) + lb_flags |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST; + + if (lb_flags) + MLX5_SET(modify_tir_in, in, ctx.self_lb_block, lb_flags); + + MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); + + mutex_lock(&mdev->mlx5e_res.hw_objs.td.list_lock); + list_for_each_entry(tir, &mdev->mlx5e_res.hw_objs.td.tirs_list, list) { + tirn = tir->tirn; + err = mlx5_core_modify_tir(mdev, tirn, in); + if (err) + break; + } + mutex_unlock(&mdev->mlx5e_res.hw_objs.td.list_lock); + + kvfree(in); + if (err) + netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err); + + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c new file mode 100644 index 0000000000..8705cffc74 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -0,0 +1,1257 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include "en.h" +#include "en/port.h" +#include "en/port_buffer.h" + +#define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */ + +#define MLX5E_100MB (100000) +#define MLX5E_1GB (1000000) + +#define MLX5E_CEE_STATE_UP 1 +#define MLX5E_CEE_STATE_DOWN 0 + +/* Max supported cable length is 1000 meters */ +#define MLX5E_MAX_CABLE_LENGTH 1000 + +enum { + MLX5E_VENDOR_TC_GROUP_NUM = 7, + MLX5E_LOWEST_PRIO_GROUP = 0, +}; + +enum { + MLX5_DCB_CHG_RESET, + MLX5_DCB_NO_CHG, + MLX5_DCB_CHG_NO_RESET, +}; + +#define MLX5_DSCP_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, qcam_reg) && \ + MLX5_CAP_QCAM_REG(mdev, qpts) && \ + MLX5_CAP_QCAM_REG(mdev, qpdpm)) + +static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state); +static int mlx5e_set_dscp2prio(struct mlx5e_priv *priv, u8 dscp, u8 prio); + +/* If dcbx mode is non-host set the dcbx mode to host. + */ +static int mlx5e_dcbnl_set_dcbx_mode(struct mlx5e_priv *priv, + enum mlx5_dcbx_oper_mode mode) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 param[MLX5_ST_SZ_DW(dcbx_param)]; + int err; + + err = mlx5_query_port_dcbx_param(mdev, param); + if (err) + return err; + + MLX5_SET(dcbx_param, param, version_admin, mode); + if (mode != MLX5E_DCBX_PARAM_VER_OPER_HOST) + MLX5_SET(dcbx_param, param, willing_admin, 1); + + return mlx5_set_port_dcbx_param(mdev, param); +} + +static int mlx5e_dcbnl_switch_to_host_mode(struct mlx5e_priv *priv) +{ + struct mlx5e_dcbx *dcbx = &priv->dcbx; + int err; + + if (!MLX5_CAP_GEN(priv->mdev, dcbx)) + return 0; + + if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_HOST) + return 0; + + err = mlx5e_dcbnl_set_dcbx_mode(priv, MLX5E_DCBX_PARAM_VER_OPER_HOST); + if (err) + return err; + + dcbx->mode = MLX5E_DCBX_PARAM_VER_OPER_HOST; + return 0; +} + +static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev, + struct ieee_ets *ets) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 tc_group[IEEE_8021QAZ_MAX_TCS]; + bool is_tc_group_6_exist = false; + bool is_zero_bw_ets_tc = false; + int err = 0; + int i; + + if (!MLX5_CAP_GEN(priv->mdev, ets)) + return -EOPNOTSUPP; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlx5_query_port_prio_tc(mdev, i, &ets->prio_tc[i]); + if (err) + return err; + } + + ets->ets_cap = mlx5_max_tc(priv->mdev) + 1; + for (i = 0; i < ets->ets_cap; i++) { + err = mlx5_query_port_tc_group(mdev, i, &tc_group[i]); + if (err) + return err; + + err = mlx5_query_port_tc_bw_alloc(mdev, i, &ets->tc_tx_bw[i]); + if (err) + return err; + + if (ets->tc_tx_bw[i] < MLX5E_MAX_BW_ALLOC && + tc_group[i] == (MLX5E_LOWEST_PRIO_GROUP + 1)) + is_zero_bw_ets_tc = true; + + if (tc_group[i] == (MLX5E_VENDOR_TC_GROUP_NUM - 1)) + is_tc_group_6_exist = true; + } + + /* Report 0% ets tc if exits*/ + if (is_zero_bw_ets_tc) { + for (i = 0; i < ets->ets_cap; i++) + if (tc_group[i] == MLX5E_LOWEST_PRIO_GROUP) + ets->tc_tx_bw[i] = 0; + } + + /* Update tc_tsa based on fw setting*/ + for (i = 0; i < ets->ets_cap; i++) { + if (ets->tc_tx_bw[i] < MLX5E_MAX_BW_ALLOC) + priv->dcbx.tc_tsa[i] = IEEE_8021QAZ_TSA_ETS; + else if (tc_group[i] == MLX5E_VENDOR_TC_GROUP_NUM && + !is_tc_group_6_exist) + priv->dcbx.tc_tsa[i] = IEEE_8021QAZ_TSA_VENDOR; + } + memcpy(ets->tc_tsa, priv->dcbx.tc_tsa, sizeof(ets->tc_tsa)); + + return err; +} + +static void mlx5e_build_tc_group(struct ieee_ets *ets, u8 *tc_group, int max_tc) +{ + bool any_tc_mapped_to_ets = false; + bool ets_zero_bw = false; + int strict_group; + int i; + + for (i = 0; i <= max_tc; i++) { + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) { + any_tc_mapped_to_ets = true; + if (!ets->tc_tx_bw[i]) + ets_zero_bw = true; + } + } + + /* strict group has higher priority than ets group */ + strict_group = MLX5E_LOWEST_PRIO_GROUP; + if (any_tc_mapped_to_ets) + strict_group++; + if (ets_zero_bw) + strict_group++; + + for (i = 0; i <= max_tc; i++) { + switch (ets->tc_tsa[i]) { + case IEEE_8021QAZ_TSA_VENDOR: + tc_group[i] = MLX5E_VENDOR_TC_GROUP_NUM; + break; + case IEEE_8021QAZ_TSA_STRICT: + tc_group[i] = strict_group++; + break; + case IEEE_8021QAZ_TSA_ETS: + tc_group[i] = MLX5E_LOWEST_PRIO_GROUP; + if (ets->tc_tx_bw[i] && ets_zero_bw) + tc_group[i] = MLX5E_LOWEST_PRIO_GROUP + 1; + break; + } + } +} + +static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw, + u8 *tc_group, int max_tc) +{ + int bw_for_ets_zero_bw_tc = 0; + int last_ets_zero_bw_tc = -1; + int num_ets_zero_bw = 0; + int i; + + for (i = 0; i <= max_tc; i++) { + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS && + !ets->tc_tx_bw[i]) { + num_ets_zero_bw++; + last_ets_zero_bw_tc = i; + } + } + + if (num_ets_zero_bw) + bw_for_ets_zero_bw_tc = MLX5E_MAX_BW_ALLOC / num_ets_zero_bw; + + for (i = 0; i <= max_tc; i++) { + switch (ets->tc_tsa[i]) { + case IEEE_8021QAZ_TSA_VENDOR: + tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC; + break; + case IEEE_8021QAZ_TSA_STRICT: + tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC; + break; + case IEEE_8021QAZ_TSA_ETS: + tc_tx_bw[i] = ets->tc_tx_bw[i] ? + ets->tc_tx_bw[i] : + bw_for_ets_zero_bw_tc; + break; + } + } + + /* Make sure the total bw for ets zero bw group is 100% */ + if (last_ets_zero_bw_tc != -1) + tc_tx_bw[last_ets_zero_bw_tc] += + MLX5E_MAX_BW_ALLOC % num_ets_zero_bw; +} + +/* If there are ETS BW 0, + * Set ETS group # to 1 for all ETS non zero BW tcs. Their sum must be 100%. + * Set group #0 to all the ETS BW 0 tcs and + * equally splits the 100% BW between them + * Report both group #0 and #1 as ETS type. + * All the tcs in group #0 will be reported with 0% BW. + */ +static int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u8 tc_tx_bw[IEEE_8021QAZ_MAX_TCS]; + u8 tc_group[IEEE_8021QAZ_MAX_TCS]; + int max_tc = mlx5_max_tc(mdev); + int err, i; + + mlx5e_build_tc_group(ets, tc_group, max_tc); + mlx5e_build_tc_tx_bw(ets, tc_tx_bw, tc_group, max_tc); + + err = mlx5_set_port_prio_tc(mdev, ets->prio_tc); + if (err) + return err; + + err = mlx5_set_port_tc_group(mdev, tc_group); + if (err) + return err; + + err = mlx5_set_port_tc_bw_alloc(mdev, tc_tx_bw); + + if (err) + return err; + + memcpy(priv->dcbx.tc_tsa, ets->tc_tsa, sizeof(ets->tc_tsa)); + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + netdev_dbg(priv->netdev, "%s: prio_%d <=> tc_%d\n", + __func__, i, ets->prio_tc[i]); + netdev_dbg(priv->netdev, "%s: tc_%d <=> tx_bw_%d%%, group_%d\n", + __func__, i, tc_tx_bw[i], tc_group[i]); + } + + return err; +} + +static int mlx5e_dbcnl_validate_ets(struct net_device *netdev, + struct ieee_ets *ets, + bool zero_sum_allowed) +{ + bool have_ets_tc = false; + int bw_sum = 0; + int i; + + /* Validate Priority */ + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + if (ets->prio_tc[i] >= MLX5E_MAX_PRIORITY) { + netdev_err(netdev, + "Failed to validate ETS: priority value greater than max(%d)\n", + MLX5E_MAX_PRIORITY); + return -EINVAL; + } + } + + /* Validate Bandwidth Sum */ + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) { + have_ets_tc = true; + bw_sum += ets->tc_tx_bw[i]; + } + } + + if (have_ets_tc && bw_sum != 100) { + if (bw_sum || (!bw_sum && !zero_sum_allowed)) + netdev_err(netdev, + "Failed to validate ETS: BW sum is illegal\n"); + return -EINVAL; + } + return 0; +} + +static int mlx5e_dcbnl_ieee_setets(struct net_device *netdev, + struct ieee_ets *ets) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + if (!MLX5_CAP_GEN(priv->mdev, ets)) + return -EOPNOTSUPP; + + err = mlx5e_dbcnl_validate_ets(netdev, ets, false); + if (err) + return err; + + err = mlx5e_dcbnl_ieee_setets_core(priv, ets); + if (err) + return err; + + return 0; +} + +static int mlx5e_dcbnl_ieee_getpfc(struct net_device *dev, + struct ieee_pfc *pfc) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + int i; + + pfc->pfc_cap = mlx5_max_tc(mdev) + 1; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + pfc->requests[i] = PPORT_PER_PRIO_GET(pstats, i, tx_pause); + pfc->indications[i] = PPORT_PER_PRIO_GET(pstats, i, rx_pause); + } + + if (MLX5_BUFFER_SUPPORTED(mdev)) + pfc->delay = priv->dcbx.cable_len; + + return mlx5_query_port_pfc(mdev, &pfc->pfc_en, NULL); +} + +static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev, + struct ieee_pfc *pfc) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + u32 old_cable_len = priv->dcbx.cable_len; + struct ieee_pfc pfc_new; + u32 changed = 0; + u8 curr_pfc_en; + int ret = 0; + + /* pfc_en */ + mlx5_query_port_pfc(mdev, &curr_pfc_en, NULL); + if (pfc->pfc_en != curr_pfc_en) { + ret = mlx5_set_port_pfc(mdev, pfc->pfc_en, pfc->pfc_en); + if (ret) + return ret; + mlx5_toggle_port_link(mdev); + changed |= MLX5E_PORT_BUFFER_PFC; + } + + if (pfc->delay && + pfc->delay < MLX5E_MAX_CABLE_LENGTH && + pfc->delay != priv->dcbx.cable_len) { + priv->dcbx.cable_len = pfc->delay; + changed |= MLX5E_PORT_BUFFER_CABLE_LEN; + } + + if (MLX5_BUFFER_SUPPORTED(mdev)) { + pfc_new.pfc_en = (changed & MLX5E_PORT_BUFFER_PFC) ? pfc->pfc_en : curr_pfc_en; + if (priv->dcbx.manual_buffer) + ret = mlx5e_port_manual_buffer_config(priv, changed, + dev->mtu, &pfc_new, + NULL, NULL); + + if (ret && (changed & MLX5E_PORT_BUFFER_CABLE_LEN)) + priv->dcbx.cable_len = old_cable_len; + } + + if (!ret) { + netdev_dbg(dev, + "%s: PFC per priority bit mask: 0x%x\n", + __func__, pfc->pfc_en); + } + return ret; +} + +static u8 mlx5e_dcbnl_getdcbx(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return priv->dcbx.cap; +} + +static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_dcbx *dcbx = &priv->dcbx; + + if (mode & DCB_CAP_DCBX_LLD_MANAGED) + return 1; + + if ((!mode) && MLX5_CAP_GEN(priv->mdev, dcbx)) { + if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_AUTO) + return 0; + + /* set dcbx to fw controlled */ + if (!mlx5e_dcbnl_set_dcbx_mode(priv, MLX5E_DCBX_PARAM_VER_OPER_AUTO)) { + dcbx->mode = MLX5E_DCBX_PARAM_VER_OPER_AUTO; + dcbx->cap &= ~DCB_CAP_DCBX_HOST; + return 0; + } + + return 1; + } + + if (!(mode & DCB_CAP_DCBX_HOST)) + return 1; + + if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev))) + return 1; + + dcbx->cap = mode; + + return 0; +} + +static int mlx5e_dcbnl_ieee_setapp(struct net_device *dev, struct dcb_app *app) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct dcb_app temp; + bool is_new; + int err; + + if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager) || + !MLX5_DSCP_SUPPORTED(priv->mdev)) + return -EOPNOTSUPP; + + if ((app->selector != IEEE_8021QAZ_APP_SEL_DSCP) || + (app->protocol >= MLX5E_MAX_DSCP)) + return -EINVAL; + + /* Save the old entry info */ + temp.selector = IEEE_8021QAZ_APP_SEL_DSCP; + temp.protocol = app->protocol; + temp.priority = priv->dcbx_dp.dscp2prio[app->protocol]; + + /* Check if need to switch to dscp trust state */ + if (!priv->dcbx.dscp_app_cnt) { + err = mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_DSCP); + if (err) + return err; + } + + /* Skip the fw command if new and old mapping are the same */ + if (app->priority != priv->dcbx_dp.dscp2prio[app->protocol]) { + err = mlx5e_set_dscp2prio(priv, app->protocol, app->priority); + if (err) + goto fw_err; + } + + /* Delete the old entry if exists */ + is_new = false; + err = dcb_ieee_delapp(dev, &temp); + if (err) + is_new = true; + + /* Add new entry and update counter */ + err = dcb_ieee_setapp(dev, app); + if (err) + return err; + + if (is_new) + priv->dcbx.dscp_app_cnt++; + + return err; + +fw_err: + mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP); + return err; +} + +static int mlx5e_dcbnl_ieee_delapp(struct net_device *dev, struct dcb_app *app) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int err; + + if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager) || + !MLX5_DSCP_SUPPORTED(priv->mdev)) + return -EOPNOTSUPP; + + if ((app->selector != IEEE_8021QAZ_APP_SEL_DSCP) || + (app->protocol >= MLX5E_MAX_DSCP)) + return -EINVAL; + + /* Skip if no dscp app entry */ + if (!priv->dcbx.dscp_app_cnt) + return -ENOENT; + + /* Check if the entry matches fw setting */ + if (app->priority != priv->dcbx_dp.dscp2prio[app->protocol]) + return -ENOENT; + + /* Delete the app entry */ + err = dcb_ieee_delapp(dev, app); + if (err) + return err; + + /* Reset the priority mapping back to zero */ + err = mlx5e_set_dscp2prio(priv, app->protocol, 0); + if (err) + goto fw_err; + + priv->dcbx.dscp_app_cnt--; + + /* Check if need to switch to pcp trust state */ + if (!priv->dcbx.dscp_app_cnt) + err = mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP); + + return err; + +fw_err: + mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP); + return err; +} + +static int mlx5e_dcbnl_ieee_getmaxrate(struct net_device *netdev, + struct ieee_maxrate *maxrate) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 max_bw_value[IEEE_8021QAZ_MAX_TCS]; + u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS]; + int err; + int i; + + err = mlx5_query_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit); + if (err) + return err; + + memset(maxrate->tc_maxrate, 0, sizeof(maxrate->tc_maxrate)); + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + switch (max_bw_unit[i]) { + case MLX5_100_MBPS_UNIT: + maxrate->tc_maxrate[i] = max_bw_value[i] * MLX5E_100MB; + break; + case MLX5_GBPS_UNIT: + maxrate->tc_maxrate[i] = max_bw_value[i] * MLX5E_1GB; + break; + case MLX5_BW_NO_LIMIT: + break; + default: + WARN(true, "non-supported BW unit"); + break; + } + } + + return 0; +} + +static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev, + struct ieee_maxrate *maxrate) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 max_bw_value[IEEE_8021QAZ_MAX_TCS]; + u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS]; + __u64 upper_limit_mbps = roundup(255 * MLX5E_100MB, MLX5E_1GB); + int i; + + memset(max_bw_value, 0, sizeof(max_bw_value)); + memset(max_bw_unit, 0, sizeof(max_bw_unit)); + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + if (!maxrate->tc_maxrate[i]) { + max_bw_unit[i] = MLX5_BW_NO_LIMIT; + continue; + } + if (maxrate->tc_maxrate[i] < upper_limit_mbps) { + max_bw_value[i] = div_u64(maxrate->tc_maxrate[i], + MLX5E_100MB); + max_bw_value[i] = max_bw_value[i] ? max_bw_value[i] : 1; + max_bw_unit[i] = MLX5_100_MBPS_UNIT; + } else { + max_bw_value[i] = div_u64(maxrate->tc_maxrate[i], + MLX5E_1GB); + max_bw_unit[i] = MLX5_GBPS_UNIT; + } + } + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + netdev_dbg(netdev, "%s: tc_%d <=> max_bw %d Gbps\n", + __func__, i, max_bw_value[i]); + } + + return mlx5_modify_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit); +} + +static u8 mlx5e_dcbnl_setall(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg; + struct mlx5_core_dev *mdev = priv->mdev; + struct ieee_ets ets; + struct ieee_pfc pfc; + int err = -EOPNOTSUPP; + int i; + + if (!MLX5_CAP_GEN(mdev, ets)) + goto out; + + memset(&ets, 0, sizeof(ets)); + memset(&pfc, 0, sizeof(pfc)); + + ets.ets_cap = IEEE_8021QAZ_MAX_TCS; + for (i = 0; i < CEE_DCBX_MAX_PGS; i++) { + ets.tc_tx_bw[i] = cee_cfg->pg_bw_pct[i]; + ets.tc_rx_bw[i] = cee_cfg->pg_bw_pct[i]; + ets.tc_tsa[i] = IEEE_8021QAZ_TSA_ETS; + ets.prio_tc[i] = cee_cfg->prio_to_pg_map[i]; + netdev_dbg(netdev, + "%s: Priority group %d: tx_bw %d, rx_bw %d, prio_tc %d\n", + __func__, i, ets.tc_tx_bw[i], ets.tc_rx_bw[i], + ets.prio_tc[i]); + } + + err = mlx5e_dbcnl_validate_ets(netdev, &ets, true); + if (err) + goto out; + + err = mlx5e_dcbnl_ieee_setets_core(priv, &ets); + if (err) { + netdev_err(netdev, + "%s, Failed to set ETS: %d\n", __func__, err); + goto out; + } + + /* Set PFC */ + pfc.pfc_cap = mlx5_max_tc(mdev) + 1; + if (!cee_cfg->pfc_enable) + pfc.pfc_en = 0; + else + for (i = 0; i < CEE_DCBX_MAX_PRIO; i++) + pfc.pfc_en |= cee_cfg->pfc_setting[i] << i; + + err = mlx5e_dcbnl_ieee_setpfc(netdev, &pfc); + if (err) { + netdev_err(netdev, + "%s, Failed to set PFC: %d\n", __func__, err); + goto out; + } +out: + return err ? MLX5_DCB_NO_CHG : MLX5_DCB_CHG_RESET; +} + +static u8 mlx5e_dcbnl_getstate(struct net_device *netdev) +{ + return MLX5E_CEE_STATE_UP; +} + +static void mlx5e_dcbnl_getpermhwaddr(struct net_device *netdev, + u8 *perm_addr) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + if (!perm_addr) + return; + + memset(perm_addr, 0xff, MAX_ADDR_LEN); + + mlx5_query_mac_address(priv->mdev, perm_addr); +} + +static void mlx5e_dcbnl_setpgtccfgtx(struct net_device *netdev, + int priority, u8 prio_type, + u8 pgid, u8 bw_pct, u8 up_map) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg; + + if (priority >= CEE_DCBX_MAX_PRIO) { + netdev_err(netdev, + "%s, priority is out of range\n", __func__); + return; + } + + if (pgid >= CEE_DCBX_MAX_PGS) { + netdev_err(netdev, + "%s, priority group is out of range\n", __func__); + return; + } + + cee_cfg->prio_to_pg_map[priority] = pgid; +} + +static void mlx5e_dcbnl_setpgbwgcfgtx(struct net_device *netdev, + int pgid, u8 bw_pct) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg; + + if (pgid >= CEE_DCBX_MAX_PGS) { + netdev_err(netdev, + "%s, priority group is out of range\n", __func__); + return; + } + + cee_cfg->pg_bw_pct[pgid] = bw_pct; +} + +static void mlx5e_dcbnl_getpgtccfgtx(struct net_device *netdev, + int priority, u8 *prio_type, + u8 *pgid, u8 *bw_pct, u8 *up_map) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + if (!MLX5_CAP_GEN(priv->mdev, ets)) { + netdev_err(netdev, "%s, ets is not supported\n", __func__); + return; + } + + if (priority >= CEE_DCBX_MAX_PRIO) { + netdev_err(netdev, + "%s, priority is out of range\n", __func__); + return; + } + + *prio_type = 0; + *bw_pct = 0; + *up_map = 0; + + if (mlx5_query_port_prio_tc(mdev, priority, pgid)) + *pgid = 0; +} + +static void mlx5e_dcbnl_getpgbwgcfgtx(struct net_device *netdev, + int pgid, u8 *bw_pct) +{ + struct ieee_ets ets; + + if (pgid >= CEE_DCBX_MAX_PGS) { + netdev_err(netdev, + "%s, priority group is out of range\n", __func__); + return; + } + + mlx5e_dcbnl_ieee_getets(netdev, &ets); + *bw_pct = ets.tc_tx_bw[pgid]; +} + +static void mlx5e_dcbnl_setpfccfg(struct net_device *netdev, + int priority, u8 setting) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg; + + if (priority >= CEE_DCBX_MAX_PRIO) { + netdev_err(netdev, + "%s, priority is out of range\n", __func__); + return; + } + + if (setting > 1) + return; + + cee_cfg->pfc_setting[priority] = setting; +} + +static int +mlx5e_dcbnl_get_priority_pfc(struct net_device *netdev, + int priority, u8 *setting) +{ + struct ieee_pfc pfc; + int err; + + err = mlx5e_dcbnl_ieee_getpfc(netdev, &pfc); + + if (err) + *setting = 0; + else + *setting = (pfc.pfc_en >> priority) & 0x01; + + return err; +} + +static void mlx5e_dcbnl_getpfccfg(struct net_device *netdev, + int priority, u8 *setting) +{ + if (priority >= CEE_DCBX_MAX_PRIO) { + netdev_err(netdev, + "%s, priority is out of range\n", __func__); + return; + } + + if (!setting) + return; + + mlx5e_dcbnl_get_priority_pfc(netdev, priority, setting); +} + +static u8 mlx5e_dcbnl_getcap(struct net_device *netdev, + int capid, u8 *cap) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 rval = 0; + + switch (capid) { + case DCB_CAP_ATTR_PG: + *cap = true; + break; + case DCB_CAP_ATTR_PFC: + *cap = true; + break; + case DCB_CAP_ATTR_UP2TC: + *cap = false; + break; + case DCB_CAP_ATTR_PG_TCS: + *cap = 1 << mlx5_max_tc(mdev); + break; + case DCB_CAP_ATTR_PFC_TCS: + *cap = 1 << mlx5_max_tc(mdev); + break; + case DCB_CAP_ATTR_GSP: + *cap = false; + break; + case DCB_CAP_ATTR_BCN: + *cap = false; + break; + case DCB_CAP_ATTR_DCBX: + *cap = priv->dcbx.cap | + DCB_CAP_DCBX_VER_CEE | + DCB_CAP_DCBX_VER_IEEE; + break; + default: + *cap = 0; + rval = 1; + break; + } + + return rval; +} + +static int mlx5e_dcbnl_getnumtcs(struct net_device *netdev, + int tcs_id, u8 *num) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + switch (tcs_id) { + case DCB_NUMTCS_ATTR_PG: + case DCB_NUMTCS_ATTR_PFC: + *num = mlx5_max_tc(mdev) + 1; + break; + default: + return -EINVAL; + } + + return 0; +} + +static u8 mlx5e_dcbnl_getpfcstate(struct net_device *netdev) +{ + struct ieee_pfc pfc; + + if (mlx5e_dcbnl_ieee_getpfc(netdev, &pfc)) + return MLX5E_CEE_STATE_DOWN; + + return pfc.pfc_en ? MLX5E_CEE_STATE_UP : MLX5E_CEE_STATE_DOWN; +} + +static void mlx5e_dcbnl_setpfcstate(struct net_device *netdev, u8 state) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg; + + if ((state != MLX5E_CEE_STATE_UP) && (state != MLX5E_CEE_STATE_DOWN)) + return; + + cee_cfg->pfc_enable = state; +} + +static int mlx5e_dcbnl_getbuffer(struct net_device *dev, + struct dcbnl_buffer *dcb_buffer) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_port_buffer port_buffer; + u8 buffer[MLX5E_MAX_PRIORITY]; + int i, err; + + if (!MLX5_BUFFER_SUPPORTED(mdev)) + return -EOPNOTSUPP; + + err = mlx5e_port_query_priority2buffer(mdev, buffer); + if (err) + return err; + + for (i = 0; i < MLX5E_MAX_PRIORITY; i++) + dcb_buffer->prio2buffer[i] = buffer[i]; + + err = mlx5e_port_query_buffer(priv, &port_buffer); + if (err) + return err; + + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) + dcb_buffer->buffer_size[i] = port_buffer.buffer[i].size; + dcb_buffer->total_size = port_buffer.port_buffer_size - + port_buffer.internal_buffers_size; + + return 0; +} + +static int mlx5e_dcbnl_setbuffer(struct net_device *dev, + struct dcbnl_buffer *dcb_buffer) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_port_buffer port_buffer; + u8 old_prio2buffer[MLX5E_MAX_PRIORITY]; + u32 *buffer_size = NULL; + u8 *prio2buffer = NULL; + u32 changed = 0; + int i, err; + + if (!MLX5_BUFFER_SUPPORTED(mdev)) + return -EOPNOTSUPP; + + for (i = 0; i < DCBX_MAX_BUFFERS; i++) + mlx5_core_dbg(mdev, "buffer[%d]=%d\n", i, dcb_buffer->buffer_size[i]); + + for (i = 0; i < MLX5E_MAX_PRIORITY; i++) + mlx5_core_dbg(mdev, "priority %d buffer%d\n", i, dcb_buffer->prio2buffer[i]); + + err = mlx5e_port_query_priority2buffer(mdev, old_prio2buffer); + if (err) + return err; + + for (i = 0; i < MLX5E_MAX_PRIORITY; i++) { + if (dcb_buffer->prio2buffer[i] != old_prio2buffer[i]) { + changed |= MLX5E_PORT_BUFFER_PRIO2BUFFER; + prio2buffer = dcb_buffer->prio2buffer; + break; + } + } + + err = mlx5e_port_query_buffer(priv, &port_buffer); + if (err) + return err; + + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { + if (port_buffer.buffer[i].size != dcb_buffer->buffer_size[i]) { + changed |= MLX5E_PORT_BUFFER_SIZE; + buffer_size = dcb_buffer->buffer_size; + break; + } + } + + if (!changed) + return 0; + + priv->dcbx.manual_buffer = true; + err = mlx5e_port_manual_buffer_config(priv, changed, dev->mtu, NULL, + buffer_size, prio2buffer); + return err; +} + +static const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = { + .ieee_getets = mlx5e_dcbnl_ieee_getets, + .ieee_setets = mlx5e_dcbnl_ieee_setets, + .ieee_getmaxrate = mlx5e_dcbnl_ieee_getmaxrate, + .ieee_setmaxrate = mlx5e_dcbnl_ieee_setmaxrate, + .ieee_getpfc = mlx5e_dcbnl_ieee_getpfc, + .ieee_setpfc = mlx5e_dcbnl_ieee_setpfc, + .ieee_setapp = mlx5e_dcbnl_ieee_setapp, + .ieee_delapp = mlx5e_dcbnl_ieee_delapp, + .getdcbx = mlx5e_dcbnl_getdcbx, + .setdcbx = mlx5e_dcbnl_setdcbx, + .dcbnl_getbuffer = mlx5e_dcbnl_getbuffer, + .dcbnl_setbuffer = mlx5e_dcbnl_setbuffer, + +/* CEE interfaces */ + .setall = mlx5e_dcbnl_setall, + .getstate = mlx5e_dcbnl_getstate, + .getpermhwaddr = mlx5e_dcbnl_getpermhwaddr, + + .setpgtccfgtx = mlx5e_dcbnl_setpgtccfgtx, + .setpgbwgcfgtx = mlx5e_dcbnl_setpgbwgcfgtx, + .getpgtccfgtx = mlx5e_dcbnl_getpgtccfgtx, + .getpgbwgcfgtx = mlx5e_dcbnl_getpgbwgcfgtx, + + .setpfccfg = mlx5e_dcbnl_setpfccfg, + .getpfccfg = mlx5e_dcbnl_getpfccfg, + .getcap = mlx5e_dcbnl_getcap, + .getnumtcs = mlx5e_dcbnl_getnumtcs, + .getpfcstate = mlx5e_dcbnl_getpfcstate, + .setpfcstate = mlx5e_dcbnl_setpfcstate, +}; + +void mlx5e_dcbnl_build_netdev(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + if (MLX5_CAP_GEN(mdev, vport_group_manager) && MLX5_CAP_GEN(mdev, qos)) + netdev->dcbnl_ops = &mlx5e_dcbnl_ops; +} + +static void mlx5e_dcbnl_query_dcbx_mode(struct mlx5e_priv *priv, + enum mlx5_dcbx_oper_mode *mode) +{ + u32 out[MLX5_ST_SZ_DW(dcbx_param)]; + + *mode = MLX5E_DCBX_PARAM_VER_OPER_HOST; + + if (!mlx5_query_port_dcbx_param(priv->mdev, out)) + *mode = MLX5_GET(dcbx_param, out, version_oper); + + /* From driver's point of view, we only care if the mode + * is host (HOST) or non-host (AUTO) + */ + if (*mode != MLX5E_DCBX_PARAM_VER_OPER_HOST) + *mode = MLX5E_DCBX_PARAM_VER_OPER_AUTO; +} + +static void mlx5e_ets_init(struct mlx5e_priv *priv) +{ + struct ieee_ets ets; + int err; + int i; + + if (!MLX5_CAP_GEN(priv->mdev, ets)) + return; + + memset(&ets, 0, sizeof(ets)); + ets.ets_cap = mlx5_max_tc(priv->mdev) + 1; + for (i = 0; i < ets.ets_cap; i++) { + ets.tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC; + ets.tc_tsa[i] = IEEE_8021QAZ_TSA_VENDOR; + ets.prio_tc[i] = i; + } + + if (ets.ets_cap > 1) { + /* tclass[prio=0]=1, tclass[prio=1]=0, tclass[prio=i]=i (for i>1) */ + ets.prio_tc[0] = 1; + ets.prio_tc[1] = 0; + } + + err = mlx5e_dcbnl_ieee_setets_core(priv, &ets); + if (err) + netdev_err(priv->netdev, + "%s, Failed to init ETS: %d\n", __func__, err); +} + +enum { + INIT, + DELETE, +}; + +static void mlx5e_dcbnl_dscp_app(struct mlx5e_priv *priv, int action) +{ + struct dcb_app temp; + int i; + + if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager)) + return; + + if (!MLX5_DSCP_SUPPORTED(priv->mdev)) + return; + + /* No SEL_DSCP entry in non DSCP state */ + if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_DSCP) + return; + + temp.selector = IEEE_8021QAZ_APP_SEL_DSCP; + for (i = 0; i < MLX5E_MAX_DSCP; i++) { + temp.protocol = i; + temp.priority = priv->dcbx_dp.dscp2prio[i]; + if (action == INIT) + dcb_ieee_setapp(priv->netdev, &temp); + else + dcb_ieee_delapp(priv->netdev, &temp); + } + + priv->dcbx.dscp_app_cnt = (action == INIT) ? MLX5E_MAX_DSCP : 0; +} + +void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv) +{ + mlx5e_dcbnl_dscp_app(priv, INIT); +} + +void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv) +{ + mlx5e_dcbnl_dscp_app(priv, DELETE); +} + +static void mlx5e_params_calc_trust_tx_min_inline_mode(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + u8 trust_state) +{ + mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); + if (trust_state == MLX5_QPTS_TRUST_DSCP && + params->tx_min_inline_mode == MLX5_INLINE_MODE_L2) + params->tx_min_inline_mode = MLX5_INLINE_MODE_IP; +} + +static int mlx5e_update_trust_state_hw(struct mlx5e_priv *priv, void *context) +{ + u8 *trust_state = context; + int err; + + err = mlx5_set_trust_state(priv->mdev, *trust_state); + if (err) + return err; + WRITE_ONCE(priv->dcbx_dp.trust_state, *trust_state); + + return 0; +} + +static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state) +{ + struct mlx5e_params new_params; + bool reset = true; + int err; + + mutex_lock(&priv->state_lock); + + new_params = priv->channels.params; + mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &new_params, + trust_state); + + /* Skip if tx_min_inline is the same */ + if (new_params.tx_min_inline_mode == priv->channels.params.tx_min_inline_mode) + reset = false; + + err = mlx5e_safe_switch_params(priv, &new_params, + mlx5e_update_trust_state_hw, + &trust_state, reset); + + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_set_dscp2prio(struct mlx5e_priv *priv, u8 dscp, u8 prio) +{ + int err; + + err = mlx5_set_dscp2prio(priv->mdev, dscp, prio); + if (err) + return err; + + priv->dcbx_dp.dscp2prio[dscp] = prio; + return err; +} + +static int mlx5e_trust_initialize(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u8 trust_state; + int err; + + if (!MLX5_DSCP_SUPPORTED(mdev)) { + WRITE_ONCE(priv->dcbx_dp.trust_state, MLX5_QPTS_TRUST_PCP); + return 0; + } + + err = mlx5_query_trust_state(priv->mdev, &trust_state); + if (err) + return err; + WRITE_ONCE(priv->dcbx_dp.trust_state, trust_state); + + if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_PCP && priv->dcbx.dscp_app_cnt) { + /* + * Align the driver state with the register state. + * Temporary state change is required to enable the app list reset. + */ + priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_DSCP; + mlx5e_dcbnl_delete_app(priv); + priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_PCP; + } + + mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &priv->channels.params, + priv->dcbx_dp.trust_state); + + err = mlx5_query_dscp2prio(priv->mdev, priv->dcbx_dp.dscp2prio); + if (err) + return err; + + return 0; +} + +#define MLX5E_BUFFER_CELL_SHIFT 7 + +static u16 mlx5e_query_port_buffers_cell_size(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 out[MLX5_ST_SZ_DW(sbcam_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(sbcam_reg)] = {}; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return (1 << MLX5E_BUFFER_CELL_SHIFT); + + if (mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out), + MLX5_REG_SBCAM, 0, 0)) + return (1 << MLX5E_BUFFER_CELL_SHIFT); + + return MLX5_GET(sbcam_reg, out, cap_cell_size); +} + +void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) +{ + struct mlx5e_dcbx *dcbx = &priv->dcbx; + + mlx5e_trust_initialize(priv); + + if (!MLX5_CAP_GEN(priv->mdev, qos)) + return; + + if (MLX5_CAP_GEN(priv->mdev, dcbx)) + mlx5e_dcbnl_query_dcbx_mode(priv, &dcbx->mode); + + priv->dcbx.cap = DCB_CAP_DCBX_VER_CEE | + DCB_CAP_DCBX_VER_IEEE; + if (priv->dcbx.mode == MLX5E_DCBX_PARAM_VER_OPER_HOST) + priv->dcbx.cap |= DCB_CAP_DCBX_HOST; + + priv->dcbx.port_buff_cell_sz = mlx5e_query_port_buffers_cell_size(priv); + priv->dcbx.manual_buffer = false; + priv->dcbx.cable_len = MLX5E_DEFAULT_CABLE_LEN; + + mlx5e_ets_init(priv); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c new file mode 100644 index 0000000000..ca9cfbf57d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include "en.h" + +static void +mlx5e_complete_dim_work(struct dim *dim, struct dim_cq_moder moder, + struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq) +{ + mlx5_core_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts); + dim->state = DIM_START_MEASURE; +} + +void mlx5e_rx_dim_work(struct work_struct *work) +{ + struct dim *dim = container_of(work, struct dim, work); + struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim); + struct dim_cq_moder cur_moder = + net_dim_get_rx_moderation(dim->mode, dim->profile_ix); + + mlx5e_complete_dim_work(dim, cur_moder, rq->mdev, &rq->cq.mcq); +} + +void mlx5e_tx_dim_work(struct work_struct *work) +{ + struct dim *dim = container_of(work, struct dim, work); + struct mlx5e_txqsq *sq = container_of(dim, struct mlx5e_txqsq, dim); + struct dim_cq_moder cur_moder = + net_dim_get_tx_moderation(dim->mode, dim->profile_ix); + + mlx5e_complete_dim_work(dim, cur_moder, sq->cq.mdev, &sq->cq.mcq); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c new file mode 100644 index 0000000000..38263d5c98 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -0,0 +1,2451 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "en.h" +#include "en/port.h" +#include "en/params.h" +#include "en/ptp.h" +#include "lib/clock.h" +#include "en/fs_ethtool.h" + +void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, + struct ethtool_drvinfo *drvinfo) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int count; + + strscpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver)); + count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%04d (%.16s)", fw_rev_maj(mdev), + fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id); + if (count >= sizeof(drvinfo->fw_version)) + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%04d", fw_rev_maj(mdev), + fw_rev_min(mdev), fw_rev_sub(mdev)); + + strscpy(drvinfo->bus_info, dev_name(mdev->device), + sizeof(drvinfo->bus_info)); +} + +static void mlx5e_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_drvinfo(priv, drvinfo); +} + +struct ptys2ethtool_config { + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised); +}; + +static +struct ptys2ethtool_config ptys2legacy_ethtool_table[MLX5E_LINK_MODES_NUMBER]; +static +struct ptys2ethtool_config ptys2ext_ethtool_table[MLX5E_EXT_LINK_MODES_NUMBER]; + +#define MLX5_BUILD_PTYS2ETHTOOL_CONFIG(reg_, table, ...) \ + ({ \ + struct ptys2ethtool_config *cfg; \ + const unsigned int modes[] = { __VA_ARGS__ }; \ + unsigned int i, bit, idx; \ + cfg = &ptys2##table##_ethtool_table[reg_]; \ + bitmap_zero(cfg->supported, \ + __ETHTOOL_LINK_MODE_MASK_NBITS); \ + bitmap_zero(cfg->advertised, \ + __ETHTOOL_LINK_MODE_MASK_NBITS); \ + for (i = 0 ; i < ARRAY_SIZE(modes) ; ++i) { \ + bit = modes[i] % 64; \ + idx = modes[i] / 64; \ + __set_bit(bit, &cfg->supported[idx]); \ + __set_bit(bit, &cfg->advertised[idx]); \ + } \ + }) + +void mlx5e_build_ptys2ethtool_map(void) +{ + memset(ptys2legacy_ethtool_table, 0, sizeof(ptys2legacy_ethtool_table)); + memset(ptys2ext_ethtool_table, 0, sizeof(ptys2ext_ethtool_table)); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_CX_SGMII, legacy, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_KX, legacy, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CX4, legacy, + ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KX4, legacy, + ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KR, legacy, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_20GBASE_KR2, legacy, + ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_CR4, legacy, + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_KR4, legacy, + ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_56GBASE_R4, legacy, + ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CR, legacy, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_SR, legacy, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_ER, legacy, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_SR4, legacy, + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_LR4, legacy, + ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_SR2, legacy, + ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_CR4, legacy, + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_SR4, legacy, + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_KR4, legacy, + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4, legacy, + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T, legacy, + ETHTOOL_LINK_MODE_10000baseT_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR, legacy, + ETHTOOL_LINK_MODE_25000baseCR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_KR, legacy, + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_SR, legacy, + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_CR2, legacy, + ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_KR2, legacy, + ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_SGMII_100M, ext, + ETHTOOL_LINK_MODE_100baseT_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_X_SGMII, ext, + ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, + ETHTOOL_LINK_MODE_1000baseX_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_5GBASE_R, ext, + ETHTOOL_LINK_MODE_5000baseT_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_XFI_XAUI_1, ext, + ETHTOOL_LINK_MODE_10000baseT_Full_BIT, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseR_FEC_BIT, + ETHTOOL_LINK_MODE_10000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseLR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseER_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_XLAUI_4_XLPPI_4, ext, + ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GAUI_1_25GBASE_CR_KR, ext, + ETHTOOL_LINK_MODE_25000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2, + ext, + ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT, + ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR, ext, + ETHTOOL_LINK_MODE_50000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseDR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_CAUI_4_100GBASE_CR4_KR4, ext, + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GAUI_2_100GBASE_CR2_KR2, ext, + ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_200GAUI_4_200GBASE_CR4_KR4, ext, + ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GAUI_1_100GBASE_CR_KR, ext, + ETHTOOL_LINK_MODE_100000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_100000baseLR_ER_FR_Full_BIT, + ETHTOOL_LINK_MODE_100000baseDR_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_200GAUI_2_200GBASE_CR2_KR2, ext, + ETHTOOL_LINK_MODE_200000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_200000baseSR2_Full_BIT, + ETHTOOL_LINK_MODE_200000baseLR2_ER2_FR2_Full_BIT, + ETHTOOL_LINK_MODE_200000baseDR2_Full_BIT, + ETHTOOL_LINK_MODE_200000baseCR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_400GAUI_4_400GBASE_CR4_KR4, ext, + ETHTOOL_LINK_MODE_400000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_400000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_400000baseLR4_ER4_FR4_Full_BIT, + ETHTOOL_LINK_MODE_400000baseDR4_Full_BIT, + ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT); +} + +static void mlx5e_ethtool_get_speed_arr(struct mlx5_core_dev *mdev, + struct ptys2ethtool_config **arr, + u32 *size) +{ + bool ext = mlx5_ptys_ext_supported(mdev); + + *arr = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table; + *size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) : + ARRAY_SIZE(ptys2legacy_ethtool_table); +} + +typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable); + +struct pflag_desc { + char name[ETH_GSTRING_LEN]; + mlx5e_pflag_handler handler; +}; + +static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS]; + +int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return mlx5e_stats_total_num(priv); + case ETH_SS_PRIV_FLAGS: + return MLX5E_NUM_PFLAGS; + case ETH_SS_TEST: + return mlx5e_self_test_num(priv); + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_get_sset_count(struct net_device *dev, int sset) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_get_sset_count(priv, sset); +} + +void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data) +{ + int i; + + switch (stringset) { + case ETH_SS_PRIV_FLAGS: + for (i = 0; i < MLX5E_NUM_PFLAGS; i++) + strcpy(data + i * ETH_GSTRING_LEN, + mlx5e_priv_flags[i].name); + break; + + case ETH_SS_TEST: + mlx5e_self_test_fill_strings(priv, data); + break; + + case ETH_SS_STATS: + mlx5e_stats_fill_strings(priv, data); + break; + } +} + +static void mlx5e_get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_strings(priv, stringset, data); +} + +void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, + struct ethtool_stats *stats, u64 *data) +{ + int idx = 0; + + mutex_lock(&priv->state_lock); + mlx5e_stats_update(priv); + mutex_unlock(&priv->state_lock); + + mlx5e_stats_fill(priv, data, idx); +} + +static void mlx5e_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, + u64 *data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_ethtool_stats(priv, stats, data); +} + +void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param) +{ + /* Limitation for regular RQ. XSK RQ may clamp the queue length in + * mlx5e_mpwqe_get_log_rq_size. + */ + u8 max_log_mpwrq_pkts = mlx5e_mpwrq_max_log_rq_pkts(priv->mdev, + PAGE_SHIFT, + MLX5E_MPWRQ_UMR_MODE_ALIGNED); + + param->rx_max_pending = 1 << min_t(u8, MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE, + max_log_mpwrq_pkts); + param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; + param->rx_pending = 1 << priv->channels.params.log_rq_mtu_frames; + param->tx_pending = 1 << priv->channels.params.log_sq_size; + + kernel_param->tcp_data_split = + (priv->channels.params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) ? + ETHTOOL_TCP_DATA_SPLIT_ENABLED : + ETHTOOL_TCP_DATA_SPLIT_DISABLED; +} + +static void mlx5e_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_ringparam(priv, param, kernel_param); +} + +int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, + struct ethtool_ringparam *param) +{ + struct mlx5e_params new_params; + u8 log_rq_size; + u8 log_sq_size; + int err = 0; + + if (param->rx_jumbo_pending) { + netdev_info(priv->netdev, "%s: rx_jumbo_pending not supported\n", + __func__); + return -EINVAL; + } + if (param->rx_mini_pending) { + netdev_info(priv->netdev, "%s: rx_mini_pending not supported\n", + __func__); + return -EINVAL; + } + + if (param->rx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)) { + netdev_info(priv->netdev, "%s: rx_pending (%d) < min (%d)\n", + __func__, param->rx_pending, + 1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE); + return -EINVAL; + } + + if (param->tx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) { + netdev_info(priv->netdev, "%s: tx_pending (%d) < min (%d)\n", + __func__, param->tx_pending, + 1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); + return -EINVAL; + } + + log_rq_size = order_base_2(param->rx_pending); + log_sq_size = order_base_2(param->tx_pending); + + if (log_rq_size == priv->channels.params.log_rq_mtu_frames && + log_sq_size == priv->channels.params.log_sq_size) + return 0; + + mutex_lock(&priv->state_lock); + + new_params = priv->channels.params; + new_params.log_rq_mtu_frames = log_rq_size; + new_params.log_sq_size = log_sq_size; + + err = mlx5e_validate_params(priv->mdev, &new_params); + if (err) + goto unlock; + + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); + +unlock: + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_set_ringparam(priv, param); +} + +void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv, + struct ethtool_channels *ch) +{ + mutex_lock(&priv->state_lock); + ch->max_combined = priv->max_nch; + ch->combined_count = priv->channels.params.num_channels; + mutex_unlock(&priv->state_lock); +} + +static void mlx5e_get_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_channels(priv, ch); +} + +int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, + struct ethtool_channels *ch) +{ + struct mlx5e_params *cur_params = &priv->channels.params; + unsigned int count = ch->combined_count; + struct mlx5e_params new_params; + bool arfs_enabled; + int rss_cnt; + bool opened; + int err = 0; + + if (!count) { + netdev_info(priv->netdev, "%s: combined_count=0 not supported\n", + __func__); + return -EINVAL; + } + + if (cur_params->num_channels == count) + return 0; + + mutex_lock(&priv->state_lock); + + /* Don't allow changing the number of channels if HTB offload is active, + * because the numeration of the QoS SQs will change, while per-queue + * qdiscs are attached. + */ + if (mlx5e_selq_is_htb_enabled(&priv->selq)) { + err = -EINVAL; + netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the number of channels\n", + __func__); + goto out; + } + + /* Don't allow changing the number of channels if non-default RSS contexts exist, + * the kernel doesn't protect against set_channels operations that break them. + */ + rss_cnt = mlx5e_rx_res_rss_cnt(priv->rx_res) - 1; + if (rss_cnt) { + err = -EINVAL; + netdev_err(priv->netdev, "%s: Non-default RSS contexts exist (%d), cannot change the number of channels\n", + __func__, rss_cnt); + goto out; + } + + /* Don't allow changing the number of channels if MQPRIO mode channel offload is active, + * because it defines a partition over the channels queues. + */ + if (cur_params->mqprio.mode == TC_MQPRIO_MODE_CHANNEL) { + err = -EINVAL; + netdev_err(priv->netdev, "%s: MQPRIO mode channel offload is active, cannot change the number of channels\n", + __func__); + goto out; + } + + new_params = *cur_params; + new_params.num_channels = count; + + opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + + arfs_enabled = opened && (priv->netdev->features & NETIF_F_NTUPLE); + if (arfs_enabled) + mlx5e_arfs_disable(priv->fs); + + /* Switch to new channels, set new parameters and close old ones */ + err = mlx5e_safe_switch_params(priv, &new_params, + mlx5e_num_channels_changed_ctx, NULL, true); + + if (arfs_enabled) { + int err2 = mlx5e_arfs_enable(priv->fs); + + if (err2) + netdev_err(priv->netdev, "%s: mlx5e_arfs_enable failed: %d\n", + __func__, err2); + } + +out: + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_set_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_set_channels(priv, ch); +} + +int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal) +{ + struct dim_cq_moder *rx_moder, *tx_moder; + + if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) + return -EOPNOTSUPP; + + rx_moder = &priv->channels.params.rx_cq_moderation; + coal->rx_coalesce_usecs = rx_moder->usec; + coal->rx_max_coalesced_frames = rx_moder->pkts; + coal->use_adaptive_rx_coalesce = priv->channels.params.rx_dim_enabled; + + tx_moder = &priv->channels.params.tx_cq_moderation; + coal->tx_coalesce_usecs = tx_moder->usec; + coal->tx_max_coalesced_frames = tx_moder->pkts; + coal->use_adaptive_tx_coalesce = priv->channels.params.tx_dim_enabled; + + kernel_coal->use_cqe_mode_rx = + MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_BASED_MODER); + kernel_coal->use_cqe_mode_tx = + MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_CQE_BASED_MODER); + + return 0; +} + +static int mlx5e_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal); +} + +#define MLX5E_MAX_COAL_TIME MLX5_MAX_CQ_PERIOD +#define MLX5E_MAX_COAL_FRAMES MLX5_MAX_CQ_COUNT + +static void +mlx5e_set_priv_channels_tx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int tc; + int i; + + for (i = 0; i < priv->channels.num; ++i) { + struct mlx5e_channel *c = priv->channels.c[i]; + + for (tc = 0; tc < c->num_tc; tc++) { + mlx5_core_modify_cq_moderation(mdev, + &c->sq[tc].cq.mcq, + coal->tx_coalesce_usecs, + coal->tx_max_coalesced_frames); + } + } +} + +static void +mlx5e_set_priv_channels_rx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int i; + + for (i = 0; i < priv->channels.num; ++i) { + struct mlx5e_channel *c = priv->channels.c[i]; + + mlx5_core_modify_cq_moderation(mdev, &c->rq.cq.mcq, + coal->rx_coalesce_usecs, + coal->rx_max_coalesced_frames); + } +} + +/* convert a boolean value of cq_mode to mlx5 period mode + * true : MLX5_CQ_PERIOD_MODE_START_FROM_CQE + * false : MLX5_CQ_PERIOD_MODE_START_FROM_EQE + */ +static int cqe_mode_to_period_mode(bool val) +{ + return val ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; +} + +int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct dim_cq_moder *rx_moder, *tx_moder; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_params new_params; + bool reset_rx, reset_tx; + bool reset = true; + u8 cq_period_mode; + int err = 0; + + if (!MLX5_CAP_GEN(mdev, cq_moderation)) + return -EOPNOTSUPP; + + if (coal->tx_coalesce_usecs > MLX5E_MAX_COAL_TIME || + coal->rx_coalesce_usecs > MLX5E_MAX_COAL_TIME) { + netdev_info(priv->netdev, "%s: maximum coalesce time supported is %lu usecs\n", + __func__, MLX5E_MAX_COAL_TIME); + return -ERANGE; + } + + if (coal->tx_max_coalesced_frames > MLX5E_MAX_COAL_FRAMES || + coal->rx_max_coalesced_frames > MLX5E_MAX_COAL_FRAMES) { + netdev_info(priv->netdev, "%s: maximum coalesced frames supported is %lu\n", + __func__, MLX5E_MAX_COAL_FRAMES); + return -ERANGE; + } + + if ((kernel_coal->use_cqe_mode_rx || kernel_coal->use_cqe_mode_tx) && + !MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe)) { + NL_SET_ERR_MSG_MOD(extack, "cqe_mode_rx/tx is not supported on this device"); + return -EOPNOTSUPP; + } + + mutex_lock(&priv->state_lock); + new_params = priv->channels.params; + + rx_moder = &new_params.rx_cq_moderation; + rx_moder->usec = coal->rx_coalesce_usecs; + rx_moder->pkts = coal->rx_max_coalesced_frames; + new_params.rx_dim_enabled = !!coal->use_adaptive_rx_coalesce; + + tx_moder = &new_params.tx_cq_moderation; + tx_moder->usec = coal->tx_coalesce_usecs; + tx_moder->pkts = coal->tx_max_coalesced_frames; + new_params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce; + + reset_rx = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled; + reset_tx = !!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled; + + cq_period_mode = cqe_mode_to_period_mode(kernel_coal->use_cqe_mode_rx); + if (cq_period_mode != rx_moder->cq_period_mode) { + mlx5e_set_rx_cq_mode_params(&new_params, cq_period_mode); + reset_rx = true; + } + + cq_period_mode = cqe_mode_to_period_mode(kernel_coal->use_cqe_mode_tx); + if (cq_period_mode != tx_moder->cq_period_mode) { + mlx5e_set_tx_cq_mode_params(&new_params, cq_period_mode); + reset_tx = true; + } + + if (reset_rx) { + u8 mode = MLX5E_GET_PFLAG(&new_params, + MLX5E_PFLAG_RX_CQE_BASED_MODER); + + mlx5e_reset_rx_moderation(&new_params, mode); + } + if (reset_tx) { + u8 mode = MLX5E_GET_PFLAG(&new_params, + MLX5E_PFLAG_TX_CQE_BASED_MODER); + + mlx5e_reset_tx_moderation(&new_params, mode); + } + + /* If DIM state hasn't changed, it's possible to modify interrupt + * moderation parameters on the fly, even if the channels are open. + */ + if (!reset_rx && !reset_tx && test_bit(MLX5E_STATE_OPENED, &priv->state)) { + if (!coal->use_adaptive_rx_coalesce) + mlx5e_set_priv_channels_rx_coalesce(priv, coal); + if (!coal->use_adaptive_tx_coalesce) + mlx5e_set_priv_channels_tx_coalesce(priv, coal); + reset = false; + } + + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset); + + mutex_unlock(&priv->state_lock); + return err; +} + +static int mlx5e_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_coalesce(priv, coal, kernel_coal, extack); +} + +static void ptys2ethtool_supported_link(struct mlx5_core_dev *mdev, + unsigned long *supported_modes, + u32 eth_proto_cap) +{ + unsigned long proto_cap = eth_proto_cap; + struct ptys2ethtool_config *table; + u32 max_size; + int proto; + + mlx5e_ethtool_get_speed_arr(mdev, &table, &max_size); + for_each_set_bit(proto, &proto_cap, max_size) + bitmap_or(supported_modes, supported_modes, + table[proto].supported, + __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +static void ptys2ethtool_adver_link(unsigned long *advertising_modes, + u32 eth_proto_cap, bool ext) +{ + unsigned long proto_cap = eth_proto_cap; + struct ptys2ethtool_config *table; + u32 max_size; + int proto; + + table = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table; + max_size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) : + ARRAY_SIZE(ptys2legacy_ethtool_table); + + for_each_set_bit(proto, &proto_cap, max_size) + bitmap_or(advertising_modes, advertising_modes, + table[proto].advertised, + __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +static const u32 pplm_fec_2_ethtool[] = { + [MLX5E_FEC_NOFEC] = ETHTOOL_FEC_OFF, + [MLX5E_FEC_FIRECODE] = ETHTOOL_FEC_BASER, + [MLX5E_FEC_RS_528_514] = ETHTOOL_FEC_RS, + [MLX5E_FEC_RS_544_514] = ETHTOOL_FEC_RS, + [MLX5E_FEC_LLRS_272_257_1] = ETHTOOL_FEC_LLRS, +}; + +static u32 pplm2ethtool_fec(u_long fec_mode, unsigned long size) +{ + int mode = 0; + + if (!fec_mode) + return ETHTOOL_FEC_AUTO; + + mode = find_first_bit(&fec_mode, size); + + if (mode < ARRAY_SIZE(pplm_fec_2_ethtool)) + return pplm_fec_2_ethtool[mode]; + + return 0; +} + +#define MLX5E_ADVERTISE_SUPPORTED_FEC(mlx5_fec, ethtool_fec) \ + do { \ + if (mlx5e_fec_in_caps(dev, 1 << (mlx5_fec))) \ + __set_bit(ethtool_fec, \ + link_ksettings->link_modes.supported);\ + } while (0) + +static const u32 pplm_fec_2_ethtool_linkmodes[] = { + [MLX5E_FEC_NOFEC] = ETHTOOL_LINK_MODE_FEC_NONE_BIT, + [MLX5E_FEC_FIRECODE] = ETHTOOL_LINK_MODE_FEC_BASER_BIT, + [MLX5E_FEC_RS_528_514] = ETHTOOL_LINK_MODE_FEC_RS_BIT, + [MLX5E_FEC_RS_544_514] = ETHTOOL_LINK_MODE_FEC_RS_BIT, + [MLX5E_FEC_LLRS_272_257_1] = ETHTOOL_LINK_MODE_FEC_LLRS_BIT, +}; + +static int get_fec_supported_advertised(struct mlx5_core_dev *dev, + struct ethtool_link_ksettings *link_ksettings) +{ + unsigned long active_fec_long; + u32 active_fec; + u32 bitn; + int err; + + err = mlx5e_get_fec_mode(dev, &active_fec, NULL); + if (err) + return (err == -EOPNOTSUPP) ? 0 : err; + + MLX5E_ADVERTISE_SUPPORTED_FEC(MLX5E_FEC_NOFEC, + ETHTOOL_LINK_MODE_FEC_NONE_BIT); + MLX5E_ADVERTISE_SUPPORTED_FEC(MLX5E_FEC_FIRECODE, + ETHTOOL_LINK_MODE_FEC_BASER_BIT); + MLX5E_ADVERTISE_SUPPORTED_FEC(MLX5E_FEC_RS_528_514, + ETHTOOL_LINK_MODE_FEC_RS_BIT); + MLX5E_ADVERTISE_SUPPORTED_FEC(MLX5E_FEC_LLRS_272_257_1, + ETHTOOL_LINK_MODE_FEC_LLRS_BIT); + + active_fec_long = active_fec; + /* active fec is a bit set, find out which bit is set and + * advertise the corresponding ethtool bit + */ + bitn = find_first_bit(&active_fec_long, sizeof(active_fec_long) * BITS_PER_BYTE); + if (bitn < ARRAY_SIZE(pplm_fec_2_ethtool_linkmodes)) + __set_bit(pplm_fec_2_ethtool_linkmodes[bitn], + link_ksettings->link_modes.advertising); + + return 0; +} + +static void ptys2ethtool_supported_advertised_port(struct mlx5_core_dev *mdev, + struct ethtool_link_ksettings *link_ksettings, + u32 eth_proto_cap, u8 connector_type) +{ + if (!MLX5_CAP_PCAM_FEATURE(mdev, ptys_connector_type)) { + if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_10GBASE_CR) + | MLX5E_PROT_MASK(MLX5E_10GBASE_SR) + | MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) + | MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) + | MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) + | MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) { + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, + FIBRE); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, + FIBRE); + } + + if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_100GBASE_KR4) + | MLX5E_PROT_MASK(MLX5E_40GBASE_KR4) + | MLX5E_PROT_MASK(MLX5E_10GBASE_KR) + | MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) + | MLX5E_PROT_MASK(MLX5E_1000BASE_KX))) { + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, + Backplane); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, + Backplane); + } + return; + } + + switch (connector_type) { + case MLX5E_PORT_TP: + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, TP); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, TP); + break; + case MLX5E_PORT_AUI: + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, AUI); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, AUI); + break; + case MLX5E_PORT_BNC: + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, BNC); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, BNC); + break; + case MLX5E_PORT_MII: + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, MII); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, MII); + break; + case MLX5E_PORT_FIBRE: + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, FIBRE); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, FIBRE); + break; + case MLX5E_PORT_DA: + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, Backplane); + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, Backplane); + break; + case MLX5E_PORT_NONE: + case MLX5E_PORT_OTHER: + default: + break; + } +} + +static void get_speed_duplex(struct net_device *netdev, + u32 eth_proto_oper, bool force_legacy, + u16 data_rate_oper, + struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + u32 speed = SPEED_UNKNOWN; + u8 duplex = DUPLEX_UNKNOWN; + + if (!netif_carrier_ok(netdev)) + goto out; + + speed = mlx5_port_ptys2speed(priv->mdev, eth_proto_oper, force_legacy); + if (!speed) { + if (data_rate_oper) + speed = 100 * data_rate_oper; + else + speed = SPEED_UNKNOWN; + goto out; + } + + duplex = DUPLEX_FULL; + +out: + link_ksettings->base.speed = speed; + link_ksettings->base.duplex = duplex; +} + +static void get_supported(struct mlx5_core_dev *mdev, u32 eth_proto_cap, + struct ethtool_link_ksettings *link_ksettings) +{ + unsigned long *supported = link_ksettings->link_modes.supported; + ptys2ethtool_supported_link(mdev, supported, eth_proto_cap); + + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause); +} + +static void get_advertising(u32 eth_proto_cap, u8 tx_pause, u8 rx_pause, + struct ethtool_link_ksettings *link_ksettings, + bool ext) +{ + unsigned long *advertising = link_ksettings->link_modes.advertising; + ptys2ethtool_adver_link(advertising, eth_proto_cap, ext); + + if (rx_pause) + ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause); + if (tx_pause ^ rx_pause) + ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Asym_Pause); +} + +static int ptys2connector_type[MLX5E_CONNECTOR_TYPE_NUMBER] = { + [MLX5E_PORT_UNKNOWN] = PORT_OTHER, + [MLX5E_PORT_NONE] = PORT_NONE, + [MLX5E_PORT_TP] = PORT_TP, + [MLX5E_PORT_AUI] = PORT_AUI, + [MLX5E_PORT_BNC] = PORT_BNC, + [MLX5E_PORT_MII] = PORT_MII, + [MLX5E_PORT_FIBRE] = PORT_FIBRE, + [MLX5E_PORT_DA] = PORT_DA, + [MLX5E_PORT_OTHER] = PORT_OTHER, + }; + +static u8 get_connector_port(struct mlx5_core_dev *mdev, u32 eth_proto, u8 connector_type) +{ + if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_connector_type)) + return ptys2connector_type[connector_type]; + + if (eth_proto & + (MLX5E_PROT_MASK(MLX5E_10GBASE_SR) | + MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) | + MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) | + MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) { + return PORT_FIBRE; + } + + if (eth_proto & + (MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) | + MLX5E_PROT_MASK(MLX5E_10GBASE_CR) | + MLX5E_PROT_MASK(MLX5E_100GBASE_CR4))) { + return PORT_DA; + } + + if (eth_proto & + (MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) | + MLX5E_PROT_MASK(MLX5E_10GBASE_KR) | + MLX5E_PROT_MASK(MLX5E_40GBASE_KR4) | + MLX5E_PROT_MASK(MLX5E_100GBASE_KR4))) { + return PORT_NONE; + } + + return PORT_OTHER; +} + +static void get_lp_advertising(struct mlx5_core_dev *mdev, u32 eth_proto_lp, + struct ethtool_link_ksettings *link_ksettings) +{ + unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising; + bool ext = mlx5_ptys_ext_supported(mdev); + + ptys2ethtool_adver_link(lp_advertising, eth_proto_lp, ext); +} + +int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, + struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {}; + u32 eth_proto_admin; + u8 an_disable_admin; + u16 data_rate_oper; + u32 eth_proto_oper; + u32 eth_proto_cap; + u8 connector_type; + u32 rx_pause = 0; + u32 tx_pause = 0; + u32 eth_proto_lp; + bool admin_ext; + u8 an_status; + bool ext; + int err; + + err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); + if (err) { + netdev_err(priv->netdev, "%s: query port ptys failed: %d\n", + __func__, err); + goto err_query_regs; + } + ext = !!MLX5_GET_ETH_PROTO(ptys_reg, out, true, eth_proto_capability); + eth_proto_cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, + eth_proto_capability); + eth_proto_admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, + eth_proto_admin); + /* Fields: eth_proto_admin and ext_eth_proto_admin are + * mutually exclusive. Hence try reading legacy advertising + * when extended advertising is zero. + * admin_ext indicates which proto_admin (ext vs. legacy) + * should be read and interpreted + */ + admin_ext = ext; + if (ext && !eth_proto_admin) { + eth_proto_admin = MLX5_GET_ETH_PROTO(ptys_reg, out, false, + eth_proto_admin); + admin_ext = false; + } + + eth_proto_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, admin_ext, + eth_proto_oper); + eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise); + an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); + an_status = MLX5_GET(ptys_reg, out, an_status); + connector_type = MLX5_GET(ptys_reg, out, connector_type); + data_rate_oper = MLX5_GET(ptys_reg, out, data_rate_oper); + + mlx5_query_port_pause(mdev, &rx_pause, &tx_pause); + + ethtool_link_ksettings_zero_link_mode(link_ksettings, supported); + ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); + + get_supported(mdev, eth_proto_cap, link_ksettings); + get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings, + admin_ext); + get_speed_duplex(priv->netdev, eth_proto_oper, !admin_ext, + data_rate_oper, link_ksettings); + + eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; + connector_type = connector_type < MLX5E_CONNECTOR_TYPE_NUMBER ? + connector_type : MLX5E_PORT_UNKNOWN; + link_ksettings->base.port = get_connector_port(mdev, eth_proto_oper, connector_type); + ptys2ethtool_supported_advertised_port(mdev, link_ksettings, eth_proto_admin, + connector_type); + get_lp_advertising(mdev, eth_proto_lp, link_ksettings); + + if (an_status == MLX5_AN_COMPLETE) + ethtool_link_ksettings_add_link_mode(link_ksettings, + lp_advertising, Autoneg); + + link_ksettings->base.autoneg = an_disable_admin ? AUTONEG_DISABLE : + AUTONEG_ENABLE; + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, + Autoneg); + + err = get_fec_supported_advertised(mdev, link_ksettings); + if (err) { + netdev_dbg(priv->netdev, "%s: FEC caps query failed: %d\n", + __func__, err); + err = 0; /* don't fail caps query because of FEC error */ + } + + if (!an_disable_admin) + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, Autoneg); + +err_query_regs: + return err; +} + +static int mlx5e_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings); +} + +static int mlx5e_speed_validate(struct net_device *netdev, bool ext, + const unsigned long link_modes, u8 autoneg) +{ + /* Extended link-mode has no speed limitations. */ + if (ext) + return 0; + + if ((link_modes & MLX5E_PROT_MASK(MLX5E_56GBASE_R4)) && + autoneg != AUTONEG_ENABLE) { + netdev_err(netdev, "%s: 56G link speed requires autoneg enabled\n", + __func__); + return -EINVAL; + } + return 0; +} + +static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes) +{ + u32 i, ptys_modes = 0; + + for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { + if (*ptys2legacy_ethtool_table[i].advertised == 0) + continue; + if (bitmap_intersects(ptys2legacy_ethtool_table[i].advertised, + link_modes, + __ETHTOOL_LINK_MODE_MASK_NBITS)) + ptys_modes |= MLX5E_PROT_MASK(i); + } + + return ptys_modes; +} + +static u32 mlx5e_ethtool2ptys_ext_adver_link(const unsigned long *link_modes) +{ + u32 i, ptys_modes = 0; + unsigned long modes[2]; + + for (i = 0; i < MLX5E_EXT_LINK_MODES_NUMBER; ++i) { + if (ptys2ext_ethtool_table[i].advertised[0] == 0 && + ptys2ext_ethtool_table[i].advertised[1] == 0) + continue; + memset(modes, 0, sizeof(modes)); + bitmap_and(modes, ptys2ext_ethtool_table[i].advertised, + link_modes, __ETHTOOL_LINK_MODE_MASK_NBITS); + + if (modes[0] == ptys2ext_ethtool_table[i].advertised[0] && + modes[1] == ptys2ext_ethtool_table[i].advertised[1]) + ptys_modes |= MLX5E_PROT_MASK(i); + } + return ptys_modes; +} + +static bool ext_link_mode_requested(const unsigned long *adver) +{ +#define MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT ETHTOOL_LINK_MODE_50000baseKR_Full_BIT + int size = __ETHTOOL_LINK_MODE_MASK_NBITS - MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT; + __ETHTOOL_DECLARE_LINK_MODE_MASK(modes) = {0,}; + + bitmap_set(modes, MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT, size); + return bitmap_intersects(modes, adver, __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +static bool ext_requested(u8 autoneg, const unsigned long *adver, bool ext_supported) +{ + bool ext_link_mode = ext_link_mode_requested(adver); + + return autoneg == AUTONEG_ENABLE ? ext_link_mode : ext_supported; +} + +int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, + const struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_port_eth_proto eproto; + const unsigned long *adver; + bool an_changes = false; + u8 an_disable_admin; + bool ext_supported; + u8 an_disable_cap; + bool an_disable; + u32 link_modes; + u8 an_status; + u8 autoneg; + u32 speed; + bool ext; + int err; + + u32 (*ethtool2ptys_adver_func)(const unsigned long *adver); + + adver = link_ksettings->link_modes.advertising; + autoneg = link_ksettings->base.autoneg; + speed = link_ksettings->base.speed; + + ext_supported = mlx5_ptys_ext_supported(mdev); + ext = ext_requested(autoneg, adver, ext_supported); + if (!ext_supported && ext) + return -EOPNOTSUPP; + + ethtool2ptys_adver_func = ext ? mlx5e_ethtool2ptys_ext_adver_link : + mlx5e_ethtool2ptys_adver_link; + err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); + if (err) { + netdev_err(priv->netdev, "%s: query port eth proto failed: %d\n", + __func__, err); + goto out; + } + link_modes = autoneg == AUTONEG_ENABLE ? ethtool2ptys_adver_func(adver) : + mlx5_port_speed2linkmodes(mdev, speed, !ext); + + err = mlx5e_speed_validate(priv->netdev, ext, link_modes, autoneg); + if (err) + goto out; + + link_modes = link_modes & eproto.cap; + if (!link_modes) { + netdev_err(priv->netdev, "%s: Not supported link mode(s) requested", + __func__); + err = -EINVAL; + goto out; + } + + mlx5_port_query_eth_autoneg(mdev, &an_status, &an_disable_cap, + &an_disable_admin); + + an_disable = autoneg == AUTONEG_DISABLE; + an_changes = ((!an_disable && an_disable_admin) || + (an_disable && !an_disable_admin)); + + if (!an_changes && link_modes == eproto.admin) + goto out; + + mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext); + mlx5_toggle_port_link(mdev); + +out: + return err; +} + +static int mlx5e_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings); +} + +u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv) +{ + return sizeof_field(struct mlx5e_rss_params_hash, toeplitz_hash_key); +} + +static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_rxfh_key_size(priv); +} + +u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv) +{ + return MLX5E_INDIR_RQT_SIZE; +} + +static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_rxfh_indir_size(priv); +} + +static int mlx5e_get_rxfh_context(struct net_device *dev, u32 *indir, + u8 *key, u8 *hfunc, u32 rss_context) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_rx_res_rss_get_rxfh(priv->rx_res, rss_context, indir, key, hfunc); + mutex_unlock(&priv->state_lock); + return err; +} + +static int mlx5e_set_rxfh_context(struct net_device *dev, const u32 *indir, + const u8 *key, const u8 hfunc, + u32 *rss_context, bool delete) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int err; + + mutex_lock(&priv->state_lock); + if (delete) { + err = mlx5e_rx_res_rss_destroy(priv->rx_res, *rss_context); + goto unlock; + } + + if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) { + unsigned int count = priv->channels.params.num_channels; + + err = mlx5e_rx_res_rss_init(priv->rx_res, rss_context, count); + if (err) + goto unlock; + } + + err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, *rss_context, indir, key, + hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc); + +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) +{ + return mlx5e_get_rxfh_context(netdev, indir, key, hfunc, 0); +} + +int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, 0, indir, key, + hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc); + mutex_unlock(&priv->state_lock); + return err; +} + +#define MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC 100 +#define MLX5E_PFC_PREVEN_TOUT_MAX_MSEC 8000 +#define MLX5E_PFC_PREVEN_MINOR_PRECENT 85 +#define MLX5E_PFC_PREVEN_TOUT_MIN_MSEC 80 +#define MLX5E_DEVICE_STALL_MINOR_WATERMARK(critical_tout) \ + max_t(u16, MLX5E_PFC_PREVEN_TOUT_MIN_MSEC, \ + (critical_tout * MLX5E_PFC_PREVEN_MINOR_PRECENT) / 100) + +static int mlx5e_get_pfc_prevention_tout(struct net_device *netdev, + u16 *pfc_prevention_tout) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) || + !MLX5_CAP_DEBUG((priv)->mdev, stall_detect)) + return -EOPNOTSUPP; + + return mlx5_query_port_stall_watermark(mdev, pfc_prevention_tout, NULL); +} + +static int mlx5e_set_pfc_prevention_tout(struct net_device *netdev, + u16 pfc_preven) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u16 critical_tout; + u16 minor; + + if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) || + !MLX5_CAP_DEBUG((priv)->mdev, stall_detect)) + return -EOPNOTSUPP; + + critical_tout = (pfc_preven == PFC_STORM_PREVENTION_AUTO) ? + MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC : + pfc_preven; + + if (critical_tout != PFC_STORM_PREVENTION_DISABLE && + (critical_tout > MLX5E_PFC_PREVEN_TOUT_MAX_MSEC || + critical_tout < MLX5E_PFC_PREVEN_TOUT_MIN_MSEC)) { + netdev_info(netdev, "%s: pfc prevention tout not in range (%d-%d)\n", + __func__, MLX5E_PFC_PREVEN_TOUT_MIN_MSEC, + MLX5E_PFC_PREVEN_TOUT_MAX_MSEC); + return -EINVAL; + } + + minor = MLX5E_DEVICE_STALL_MINOR_WATERMARK(critical_tout); + return mlx5_set_port_stall_watermark(mdev, critical_tout, + minor); +} + +static int mlx5e_get_tunable(struct net_device *dev, + const struct ethtool_tunable *tuna, + void *data) +{ + int err; + + switch (tuna->id) { + case ETHTOOL_PFC_PREVENTION_TOUT: + err = mlx5e_get_pfc_prevention_tout(dev, data); + break; + default: + err = -EINVAL; + break; + } + + return err; +} + +static int mlx5e_set_tunable(struct net_device *dev, + const struct ethtool_tunable *tuna, + const void *data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int err; + + mutex_lock(&priv->state_lock); + + switch (tuna->id) { + case ETHTOOL_PFC_PREVENTION_TOUT: + err = mlx5e_set_pfc_prevention_tout(dev, *(u16 *)data); + break; + default: + err = -EINVAL; + break; + } + + mutex_unlock(&priv->state_lock); + return err; +} + +static void mlx5e_get_pause_stats(struct net_device *netdev, + struct ethtool_pause_stats *pause_stats) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_pause_get(priv, pause_stats); +} + +void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + err = mlx5_query_port_pause(mdev, &pauseparam->rx_pause, + &pauseparam->tx_pause); + if (err) { + netdev_err(priv->netdev, "%s: mlx5_query_port_pause failed:0x%x\n", + __func__, err); + } +} + +static void mlx5e_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_ethtool_get_pauseparam(priv, pauseparam); +} + +int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + if (!MLX5_CAP_GEN(mdev, vport_group_manager)) + return -EOPNOTSUPP; + + if (pauseparam->autoneg) + return -EINVAL; + + err = mlx5_set_port_pause(mdev, + pauseparam->rx_pause ? 1 : 0, + pauseparam->tx_pause ? 1 : 0); + if (err) { + netdev_err(priv->netdev, "%s: mlx5_set_port_pause failed:0x%x\n", + __func__, err); + } + + return err; +} + +static int mlx5e_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_pauseparam(priv, pauseparam); +} + +int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, + struct ethtool_ts_info *info) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + info->phc_index = mlx5_clock_get_ptp_index(mdev); + + if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) || + info->phc_index == -1) + return 0; + + info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + info->tx_types = BIT(HWTSTAMP_TX_OFF) | + BIT(HWTSTAMP_TX_ON); + + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | + BIT(HWTSTAMP_FILTER_ALL); + + return 0; +} + +static int mlx5e_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_get_ts_info(priv, info); +} + +static __u32 mlx5e_get_wol_supported(struct mlx5_core_dev *mdev) +{ + __u32 ret = 0; + + if (MLX5_CAP_GEN(mdev, wol_g)) + ret |= WAKE_MAGIC; + + if (MLX5_CAP_GEN(mdev, wol_s)) + ret |= WAKE_MAGICSECURE; + + if (MLX5_CAP_GEN(mdev, wol_a)) + ret |= WAKE_ARP; + + if (MLX5_CAP_GEN(mdev, wol_b)) + ret |= WAKE_BCAST; + + if (MLX5_CAP_GEN(mdev, wol_m)) + ret |= WAKE_MCAST; + + if (MLX5_CAP_GEN(mdev, wol_u)) + ret |= WAKE_UCAST; + + if (MLX5_CAP_GEN(mdev, wol_p)) + ret |= WAKE_PHY; + + return ret; +} + +static __u32 mlx5e_reformat_wol_mode_mlx5_to_linux(u8 mode) +{ + __u32 ret = 0; + + if (mode & MLX5_WOL_MAGIC) + ret |= WAKE_MAGIC; + + if (mode & MLX5_WOL_SECURED_MAGIC) + ret |= WAKE_MAGICSECURE; + + if (mode & MLX5_WOL_ARP) + ret |= WAKE_ARP; + + if (mode & MLX5_WOL_BROADCAST) + ret |= WAKE_BCAST; + + if (mode & MLX5_WOL_MULTICAST) + ret |= WAKE_MCAST; + + if (mode & MLX5_WOL_UNICAST) + ret |= WAKE_UCAST; + + if (mode & MLX5_WOL_PHY_ACTIVITY) + ret |= WAKE_PHY; + + return ret; +} + +static u8 mlx5e_reformat_wol_mode_linux_to_mlx5(__u32 mode) +{ + u8 ret = 0; + + if (mode & WAKE_MAGIC) + ret |= MLX5_WOL_MAGIC; + + if (mode & WAKE_MAGICSECURE) + ret |= MLX5_WOL_SECURED_MAGIC; + + if (mode & WAKE_ARP) + ret |= MLX5_WOL_ARP; + + if (mode & WAKE_BCAST) + ret |= MLX5_WOL_BROADCAST; + + if (mode & WAKE_MCAST) + ret |= MLX5_WOL_MULTICAST; + + if (mode & WAKE_UCAST) + ret |= MLX5_WOL_UNICAST; + + if (mode & WAKE_PHY) + ret |= MLX5_WOL_PHY_ACTIVITY; + + return ret; +} + +static void mlx5e_get_wol(struct net_device *netdev, + struct ethtool_wolinfo *wol) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 mlx5_wol_mode; + int err; + + memset(wol, 0, sizeof(*wol)); + + wol->supported = mlx5e_get_wol_supported(mdev); + if (!wol->supported) + return; + + err = mlx5_query_port_wol(mdev, &mlx5_wol_mode); + if (err) + return; + + wol->wolopts = mlx5e_reformat_wol_mode_mlx5_to_linux(mlx5_wol_mode); +} + +static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + __u32 wol_supported = mlx5e_get_wol_supported(mdev); + u32 mlx5_wol_mode; + + if (!wol_supported) + return -EOPNOTSUPP; + + if (wol->wolopts & ~wol_supported) + return -EINVAL; + + mlx5_wol_mode = mlx5e_reformat_wol_mode_linux_to_mlx5(wol->wolopts); + + return mlx5_set_port_wol(mdev, mlx5_wol_mode); +} + +static void mlx5e_get_fec_stats(struct net_device *netdev, + struct ethtool_fec_stats *fec_stats) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_fec_get(priv, fec_stats); +} + +static int mlx5e_get_fecparam(struct net_device *netdev, + struct ethtool_fecparam *fecparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u16 fec_configured; + u32 fec_active; + int err; + + err = mlx5e_get_fec_mode(mdev, &fec_active, &fec_configured); + + if (err) + return err; + + fecparam->active_fec = pplm2ethtool_fec((unsigned long)fec_active, + sizeof(unsigned long) * BITS_PER_BYTE); + + if (!fecparam->active_fec) + return -EOPNOTSUPP; + + fecparam->fec = pplm2ethtool_fec((unsigned long)fec_configured, + sizeof(unsigned long) * BITS_PER_BYTE); + + return 0; +} + +static int mlx5e_set_fecparam(struct net_device *netdev, + struct ethtool_fecparam *fecparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + unsigned long fec_bitmap; + u16 fec_policy = 0; + int mode; + int err; + + bitmap_from_arr32(&fec_bitmap, &fecparam->fec, sizeof(fecparam->fec) * BITS_PER_BYTE); + if (bitmap_weight(&fec_bitmap, ETHTOOL_FEC_LLRS_BIT + 1) > 1) + return -EOPNOTSUPP; + + for (mode = 0; mode < ARRAY_SIZE(pplm_fec_2_ethtool); mode++) { + if (!(pplm_fec_2_ethtool[mode] & fecparam->fec)) + continue; + fec_policy |= (1 << mode); + break; + } + + err = mlx5e_set_fec_mode(mdev, fec_policy); + + if (err) + return err; + + mlx5_toggle_port_link(mdev); + + return 0; +} + +static int mlx5e_set_phys_id(struct net_device *dev, + enum ethtool_phys_id_state state) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + u16 beacon_duration; + + if (!MLX5_CAP_GEN(mdev, beacon_led)) + return -EOPNOTSUPP; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + beacon_duration = MLX5_BEACON_DURATION_INF; + break; + case ETHTOOL_ID_INACTIVE: + beacon_duration = MLX5_BEACON_DURATION_OFF; + break; + default: + return -EOPNOTSUPP; + } + + return mlx5_set_port_beacon(mdev, beacon_duration); +} + +static int mlx5e_get_module_info(struct net_device *netdev, + struct ethtool_modinfo *modinfo) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *dev = priv->mdev; + int size_read = 0; + u8 data[4] = {0}; + + size_read = mlx5_query_module_eeprom(dev, 0, 2, data); + if (size_read < 2) + return -EIO; + + /* data[0] = identifier byte */ + switch (data[0]) { + case MLX5_MODULE_ID_QSFP: + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN; + break; + case MLX5_MODULE_ID_QSFP_PLUS: + case MLX5_MODULE_ID_QSFP28: + /* data[1] = revision id */ + if (data[0] == MLX5_MODULE_ID_QSFP28 || data[1] >= 0x3) { + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_MAX_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN; + } + break; + case MLX5_MODULE_ID_SFP: + modinfo->type = ETH_MODULE_SFF_8472; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + break; + default: + netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n", + __func__, data[0]); + return -EINVAL; + } + + return 0; +} + +static int mlx5e_get_module_eeprom(struct net_device *netdev, + struct ethtool_eeprom *ee, + u8 *data) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + int offset = ee->offset; + int size_read; + int i = 0; + + if (!ee->len) + return -EINVAL; + + memset(data, 0, ee->len); + + while (i < ee->len) { + size_read = mlx5_query_module_eeprom(mdev, offset, ee->len - i, + data + i); + + if (!size_read) + /* Done reading */ + return 0; + + if (size_read < 0) { + netdev_err(priv->netdev, "%s: mlx5_query_eeprom failed:0x%x\n", + __func__, size_read); + return size_read; + } + + i += size_read; + offset += size_read; + } + + return 0; +} + +static int mlx5e_get_module_eeprom_by_page(struct net_device *netdev, + const struct ethtool_module_eeprom *page_data, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_module_eeprom_query_params query; + struct mlx5_core_dev *mdev = priv->mdev; + u8 *data = page_data->data; + int size_read; + int i = 0; + + if (!page_data->length) + return -EINVAL; + + memset(data, 0, page_data->length); + + query.offset = page_data->offset; + query.i2c_address = page_data->i2c_address; + query.bank = page_data->bank; + query.page = page_data->page; + while (i < page_data->length) { + query.size = page_data->length - i; + size_read = mlx5_query_module_eeprom_by_page(mdev, &query, data + i); + + /* Done reading, return how many bytes was read */ + if (!size_read) + return i; + + if (size_read == -EINVAL) + return -EINVAL; + if (size_read < 0) { + netdev_err(priv->netdev, "%s: mlx5_query_module_eeprom_by_page failed:0x%x\n", + __func__, size_read); + return i; + } + + i += size_read; + query.offset += size_read; + } + + return i; +} + +int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, + struct ethtool_flash *flash) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct net_device *dev = priv->netdev; + const struct firmware *fw; + int err; + + if (flash->region != ETHTOOL_FLASH_ALL_REGIONS) + return -EOPNOTSUPP; + + err = request_firmware_direct(&fw, flash->data, &dev->dev); + if (err) + return err; + + dev_hold(dev); + rtnl_unlock(); + + err = mlx5_firmware_flash(mdev, fw, NULL); + release_firmware(fw); + + rtnl_lock(); + dev_put(dev); + return err; +} + +static int mlx5e_flash_device(struct net_device *dev, + struct ethtool_flash *flash) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_flash_device(priv, flash); +} + +static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, + bool is_rx_cq) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + u8 cq_period_mode, current_cq_period_mode; + struct mlx5e_params new_params; + + if (enable && !MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe)) + return -EOPNOTSUPP; + + cq_period_mode = cqe_mode_to_period_mode(enable); + + current_cq_period_mode = is_rx_cq ? + priv->channels.params.rx_cq_moderation.cq_period_mode : + priv->channels.params.tx_cq_moderation.cq_period_mode; + + if (cq_period_mode == current_cq_period_mode) + return 0; + + new_params = priv->channels.params; + if (is_rx_cq) + mlx5e_set_rx_cq_mode_params(&new_params, cq_period_mode); + else + mlx5e_set_tx_cq_mode_params(&new_params, cq_period_mode); + + return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); +} + +static int set_pflag_tx_cqe_based_moder(struct net_device *netdev, bool enable) +{ + return set_pflag_cqe_based_moder(netdev, enable, false); +} + +static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable) +{ + return set_pflag_cqe_based_moder(netdev, enable, true); +} + +int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val, bool rx_filter) +{ + bool curr_val = MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS); + struct mlx5e_params new_params; + int err = 0; + + if (!MLX5_CAP_GEN(priv->mdev, cqe_compression)) + return new_val ? -EOPNOTSUPP : 0; + + if (curr_val == new_val) + return 0; + + if (new_val && !mlx5e_profile_feature_cap(priv->profile, PTP_RX) && rx_filter) { + netdev_err(priv->netdev, + "Profile doesn't support enabling of CQE compression while hardware time-stamping is enabled.\n"); + return -EINVAL; + } + + if (priv->channels.params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { + netdev_warn(priv->netdev, "Can't set CQE compression with HW-GRO, disable it first.\n"); + return -EINVAL; + } + + new_params = priv->channels.params; + MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val); + if (rx_filter) + new_params.ptp_rx = new_val; + + if (new_params.ptp_rx == priv->channels.params.ptp_rx) + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); + else + err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_ptp_rx_manage_fs_ctx, + &new_params.ptp_rx, true); + if (err) + return err; + + netdev_dbg(priv->netdev, "MLX5E: RxCqeCmprss was turned %s\n", + MLX5E_GET_PFLAG(&priv->channels.params, + MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF"); + + return 0; +} + +static int set_pflag_rx_cqe_compress(struct net_device *netdev, + bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + bool rx_filter; + int err; + + if (!MLX5_CAP_GEN(mdev, cqe_compression)) + return -EOPNOTSUPP; + + rx_filter = priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE; + err = mlx5e_modify_rx_cqe_compression_locked(priv, enable, rx_filter); + if (err) + return err; + + priv->channels.params.rx_cqe_compress_def = enable; + + return 0; +} + +static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_params new_params; + int err; + + if (enable) { + /* Checking the regular RQ here; mlx5e_validate_xsk_param called + * from mlx5e_open_xsk will check for each XSK queue, and + * mlx5e_safe_switch_params will be reverted if any check fails. + */ + int err = mlx5e_mpwrq_validate_regular(mdev, &priv->channels.params); + + if (err) + return err; + } else if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) { + netdev_warn(netdev, "Can't set legacy RQ with HW-GRO/LRO, disable them first\n"); + return -EINVAL; + } + + new_params = priv->channels.params; + + MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_STRIDING_RQ, enable); + mlx5e_set_rq_type(mdev, &new_params); + + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); + if (err) + return err; + + /* update XDP supported features */ + mlx5e_set_xdp_feature(netdev); + + return 0; +} + +static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_channels *channels = &priv->channels; + struct mlx5e_channel *c; + int i; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || + priv->channels.params.xdp_prog) + return 0; + + for (i = 0; i < channels->num; i++) { + c = channels->c[i]; + if (enable) + __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); + else + __clear_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); + } + + return 0; +} + +static int set_pflag_tx_mpwqe_common(struct net_device *netdev, u32 flag, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_params new_params; + + if (enable && !mlx5e_tx_mpwqe_supported(mdev)) + return -EOPNOTSUPP; + + new_params = priv->channels.params; + + MLX5E_SET_PFLAG(&new_params, flag, enable); + + return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); +} + +static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable) +{ + return set_pflag_tx_mpwqe_common(netdev, MLX5E_PFLAG_XDP_TX_MPWQE, enable); +} + +static int set_pflag_skb_tx_mpwqe(struct net_device *netdev, bool enable) +{ + return set_pflag_tx_mpwqe_common(netdev, MLX5E_PFLAG_SKB_TX_MPWQE, enable); +} + +static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_params new_params; + int err; + + if (!MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn) || + !MLX5_CAP_GEN_2(mdev, ts_cqe_metadata_size2wqe_counter)) + return -EOPNOTSUPP; + + /* Don't allow changing the PTP state if HTB offload is active, because + * the numeration of the QoS SQs will change, while per-queue qdiscs are + * attached. + */ + if (mlx5e_selq_is_htb_enabled(&priv->selq)) { + netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the PTP state\n", + __func__); + return -EINVAL; + } + + new_params = priv->channels.params; + /* Don't allow enabling TX-port-TS if MQPRIO mode channel offload is + * active, since it defines explicitly which TC accepts the packet. + * This conflicts with TX-port-TS hijacking the PTP traffic to a specific + * HW TX-queue. + */ + if (enable && new_params.mqprio.mode == TC_MQPRIO_MODE_CHANNEL) { + netdev_err(priv->netdev, + "%s: MQPRIO mode channel offload is active, cannot set the TX-port-TS\n", + __func__); + return -EINVAL; + } + MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_TX_PORT_TS, enable); + /* No need to verify SQ stop room as + * ptpsq.txqsq.stop_room <= generic_sq->stop_room, and both + * has the same log_sq_size. + */ + + err = mlx5e_safe_switch_params(priv, &new_params, + mlx5e_num_channels_changed_ctx, NULL, true); + if (!err) + priv->tx_ptp_opened = true; + + return err; +} + +static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS] = { + { "rx_cqe_moder", set_pflag_rx_cqe_based_moder }, + { "tx_cqe_moder", set_pflag_tx_cqe_based_moder }, + { "rx_cqe_compress", set_pflag_rx_cqe_compress }, + { "rx_striding_rq", set_pflag_rx_striding_rq }, + { "rx_no_csum_complete", set_pflag_rx_no_csum_complete }, + { "xdp_tx_mpwqe", set_pflag_xdp_tx_mpwqe }, + { "skb_tx_mpwqe", set_pflag_skb_tx_mpwqe }, + { "tx_port_ts", set_pflag_tx_port_ts }, +}; + +static int mlx5e_handle_pflag(struct net_device *netdev, + u32 wanted_flags, + enum mlx5e_priv_flag flag) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + bool enable = !!(wanted_flags & BIT(flag)); + u32 changes = wanted_flags ^ priv->channels.params.pflags; + int err; + + if (!(changes & BIT(flag))) + return 0; + + err = mlx5e_priv_flags[flag].handler(netdev, enable); + if (err) { + netdev_err(netdev, "%s private flag '%s' failed err %d\n", + enable ? "Enable" : "Disable", mlx5e_priv_flags[flag].name, err); + return err; + } + + MLX5E_SET_PFLAG(&priv->channels.params, flag, enable); + return 0; +} + +static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + enum mlx5e_priv_flag pflag; + int err; + + mutex_lock(&priv->state_lock); + + for (pflag = 0; pflag < MLX5E_NUM_PFLAGS; pflag++) { + err = mlx5e_handle_pflag(netdev, pflags, pflag); + if (err) + break; + } + + mutex_unlock(&priv->state_lock); + + /* Need to fix some features.. */ + netdev_update_features(netdev); + + return err; +} + +static u32 mlx5e_get_priv_flags(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return priv->channels.params.pflags; +} + +static int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, + u32 *rule_locs) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + /* ETHTOOL_GRXRINGS is needed by ethtool -x which is not part + * of rxnfc. We keep this logic out of mlx5e_ethtool_get_rxnfc, + * to avoid breaking "ethtool -x" when mlx5e_ethtool_get_rxnfc + * is compiled out via CONFIG_MLX5_EN_RXNFC=n. + */ + if (info->cmd == ETHTOOL_GRXRINGS) { + info->data = priv->channels.params.num_channels; + return 0; + } + + return mlx5e_ethtool_get_rxnfc(priv, info, rule_locs); +} + +static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_set_rxnfc(priv, cmd); +} + +static int query_port_status_opcode(struct mlx5_core_dev *mdev, u32 *status_opcode) +{ + struct mlx5_ifc_pddr_troubleshooting_page_bits *pddr_troubleshooting_page; + u32 in[MLX5_ST_SZ_DW(pddr_reg)] = {}; + u32 out[MLX5_ST_SZ_DW(pddr_reg)]; + int err; + + MLX5_SET(pddr_reg, in, local_port, 1); + MLX5_SET(pddr_reg, in, page_select, + MLX5_PDDR_REG_PAGE_SELECT_TROUBLESHOOTING_INFO_PAGE); + + pddr_troubleshooting_page = MLX5_ADDR_OF(pddr_reg, in, page_data); + MLX5_SET(pddr_troubleshooting_page, pddr_troubleshooting_page, + group_opcode, MLX5_PDDR_REG_TRBLSH_GROUP_OPCODE_MONITOR); + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PDDR, 0, 0); + if (err) + return err; + + pddr_troubleshooting_page = MLX5_ADDR_OF(pddr_reg, out, page_data); + *status_opcode = MLX5_GET(pddr_troubleshooting_page, pddr_troubleshooting_page, + status_opcode); + return 0; +} + +struct mlx5e_ethtool_link_ext_state_opcode_mapping { + u32 status_opcode; + enum ethtool_link_ext_state link_ext_state; + u8 link_ext_substate; +}; + +static const struct mlx5e_ethtool_link_ext_state_opcode_mapping +mlx5e_link_ext_state_opcode_map[] = { + /* States relating to the autonegotiation or issues therein */ + {2, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED}, + {3, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED}, + {4, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NEXT_PAGE_EXCHANGE_FAILED}, + {36, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED_FORCE_MODE}, + {38, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_FEC_MISMATCH_DURING_OVERRIDE}, + {39, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD}, + + /* Failure during link training */ + {5, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_FRAME_LOCK_NOT_ACQUIRED}, + {6, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_INHIBIT_TIMEOUT}, + {7, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_PARTNER_DID_NOT_SET_RECEIVER_READY}, + {8, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, 0}, + {14, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT}, + + /* Logical mismatch in physical coding sublayer or forward error correction sublayer */ + {9, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK}, + {10, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_AM_LOCK}, + {11, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_GET_ALIGN_STATUS}, + {12, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_FC_FEC_IS_NOT_LOCKED}, + {13, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED}, + + /* Signal integrity issues */ + {15, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY, 0}, + {17, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS}, + {42, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE}, + + /* No cable connected */ + {1024, ETHTOOL_LINK_EXT_STATE_NO_CABLE, 0}, + + /* Failure is related to cable, e.g., unsupported cable */ + {16, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + {20, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + {29, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + {1025, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + {1029, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + {1031, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, 0}, + + /* Failure is related to EEPROM, e.g., failure during reading or parsing the data */ + {1027, ETHTOOL_LINK_EXT_STATE_EEPROM_ISSUE, 0}, + + /* Failure during calibration algorithm */ + {23, ETHTOOL_LINK_EXT_STATE_CALIBRATION_FAILURE, 0}, + + /* The hardware is not able to provide the power required from cable or module */ + {1032, ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED, 0}, + + /* The module is overheated */ + {1030, ETHTOOL_LINK_EXT_STATE_OVERHEAT, 0}, +}; + +static void +mlx5e_set_link_ext_state(struct mlx5e_ethtool_link_ext_state_opcode_mapping + link_ext_state_mapping, + struct ethtool_link_ext_state_info *link_ext_state_info) +{ + switch (link_ext_state_mapping.link_ext_state) { + case ETHTOOL_LINK_EXT_STATE_AUTONEG: + link_ext_state_info->autoneg = + link_ext_state_mapping.link_ext_substate; + break; + case ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE: + link_ext_state_info->link_training = + link_ext_state_mapping.link_ext_substate; + break; + case ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH: + link_ext_state_info->link_logical_mismatch = + link_ext_state_mapping.link_ext_substate; + break; + case ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY: + link_ext_state_info->bad_signal_integrity = + link_ext_state_mapping.link_ext_substate; + break; + case ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE: + link_ext_state_info->cable_issue = + link_ext_state_mapping.link_ext_substate; + break; + default: + break; + } + + link_ext_state_info->link_ext_state = link_ext_state_mapping.link_ext_state; +} + +static int +mlx5e_get_link_ext_state(struct net_device *dev, + struct ethtool_link_ext_state_info *link_ext_state_info) +{ + struct mlx5e_ethtool_link_ext_state_opcode_mapping link_ext_state_mapping; + struct mlx5e_priv *priv = netdev_priv(dev); + u32 status_opcode = 0; + int i; + + /* Exit without data if the interface state is OK, since no extended data is + * available in such case + */ + if (netif_carrier_ok(dev)) + return -ENODATA; + + if (query_port_status_opcode(priv->mdev, &status_opcode) || + !status_opcode) + return -ENODATA; + + for (i = 0; i < ARRAY_SIZE(mlx5e_link_ext_state_opcode_map); i++) { + link_ext_state_mapping = mlx5e_link_ext_state_opcode_map[i]; + if (link_ext_state_mapping.status_opcode == status_opcode) { + mlx5e_set_link_ext_state(link_ext_state_mapping, + link_ext_state_info); + return 0; + } + } + + return -ENODATA; +} + +static void mlx5e_get_eth_phy_stats(struct net_device *netdev, + struct ethtool_eth_phy_stats *phy_stats) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_eth_phy_get(priv, phy_stats); +} + +static void mlx5e_get_eth_mac_stats(struct net_device *netdev, + struct ethtool_eth_mac_stats *mac_stats) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_eth_mac_get(priv, mac_stats); +} + +static void mlx5e_get_eth_ctrl_stats(struct net_device *netdev, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_eth_ctrl_get(priv, ctrl_stats); +} + +static void mlx5e_get_rmon_stats(struct net_device *netdev, + struct ethtool_rmon_stats *rmon_stats, + const struct ethtool_rmon_hist_range **ranges) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_rmon_get(priv, rmon_stats, ranges); +} + +const struct ethtool_ops mlx5e_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USE_ADAPTIVE | + ETHTOOL_COALESCE_USE_CQE, + .get_drvinfo = mlx5e_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_link_ext_state = mlx5e_get_link_ext_state, + .get_strings = mlx5e_get_strings, + .get_sset_count = mlx5e_get_sset_count, + .get_ethtool_stats = mlx5e_get_ethtool_stats, + .get_ringparam = mlx5e_get_ringparam, + .set_ringparam = mlx5e_set_ringparam, + .get_channels = mlx5e_get_channels, + .set_channels = mlx5e_set_channels, + .get_coalesce = mlx5e_get_coalesce, + .set_coalesce = mlx5e_set_coalesce, + .get_link_ksettings = mlx5e_get_link_ksettings, + .set_link_ksettings = mlx5e_set_link_ksettings, + .get_rxfh_key_size = mlx5e_get_rxfh_key_size, + .get_rxfh_indir_size = mlx5e_get_rxfh_indir_size, + .get_rxfh = mlx5e_get_rxfh, + .set_rxfh = mlx5e_set_rxfh, + .get_rxfh_context = mlx5e_get_rxfh_context, + .set_rxfh_context = mlx5e_set_rxfh_context, + .get_rxnfc = mlx5e_get_rxnfc, + .set_rxnfc = mlx5e_set_rxnfc, + .get_tunable = mlx5e_get_tunable, + .set_tunable = mlx5e_set_tunable, + .get_pause_stats = mlx5e_get_pause_stats, + .get_pauseparam = mlx5e_get_pauseparam, + .set_pauseparam = mlx5e_set_pauseparam, + .get_ts_info = mlx5e_get_ts_info, + .set_phys_id = mlx5e_set_phys_id, + .get_wol = mlx5e_get_wol, + .set_wol = mlx5e_set_wol, + .get_module_info = mlx5e_get_module_info, + .get_module_eeprom = mlx5e_get_module_eeprom, + .get_module_eeprom_by_page = mlx5e_get_module_eeprom_by_page, + .flash_device = mlx5e_flash_device, + .get_priv_flags = mlx5e_get_priv_flags, + .set_priv_flags = mlx5e_set_priv_flags, + .self_test = mlx5e_self_test, + .get_fec_stats = mlx5e_get_fec_stats, + .get_fecparam = mlx5e_get_fecparam, + .set_fecparam = mlx5e_set_fecparam, + .get_eth_phy_stats = mlx5e_get_eth_phy_stats, + .get_eth_mac_stats = mlx5e_get_eth_mac_stats, + .get_eth_ctrl_stats = mlx5e_get_eth_ctrl_stats, + .get_rmon_stats = mlx5e_get_rmon_stats, + .get_link_ext_stats = mlx5e_get_link_ext_stats +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c new file mode 100644 index 0000000000..934b0d5ce1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -0,0 +1,1600 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "en_tc.h" +#include "lib/mpfs.h" +#include "en/ptp.h" +#include "en/fs_ethtool.h" + +struct mlx5e_flow_steering { + struct work_struct set_rx_mode_work; + bool state_destroy; + bool vlan_strip_disable; + struct mlx5_core_dev *mdev; + struct net_device *netdev; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_namespace *egress_ns; +#ifdef CONFIG_MLX5_EN_RXNFC + struct mlx5e_ethtool_steering *ethtool; +#endif + struct mlx5e_tc_table *tc; + struct mlx5e_promisc_table promisc; + struct mlx5e_vlan_table *vlan; + struct mlx5e_l2_table l2; + struct mlx5_ttc_table *ttc; + struct mlx5_ttc_table *inner_ttc; +#ifdef CONFIG_MLX5_EN_ARFS + struct mlx5e_arfs_tables *arfs; +#endif +#ifdef CONFIG_MLX5_EN_TLS + struct mlx5e_accel_fs_tcp *accel_tcp; +#endif + struct mlx5e_fs_udp *udp; + struct mlx5e_fs_any *any; + struct mlx5e_ptp_fs *ptp_fs; + struct dentry *dfs_root; +}; + +static int mlx5e_add_l2_flow_rule(struct mlx5e_flow_steering *fs, + struct mlx5e_l2_rule *ai, int type); +static void mlx5e_del_l2_flow_rule(struct mlx5e_flow_steering *fs, + struct mlx5e_l2_rule *ai); + +enum { + MLX5E_FULLMATCH = 0, + MLX5E_ALLMULTI = 1, +}; + +enum { + MLX5E_UC = 0, + MLX5E_MC_IPV4 = 1, + MLX5E_MC_IPV6 = 2, + MLX5E_MC_OTHER = 3, +}; + +enum { + MLX5E_ACTION_NONE = 0, + MLX5E_ACTION_ADD = 1, + MLX5E_ACTION_DEL = 2, +}; + +struct mlx5e_l2_hash_node { + struct hlist_node hlist; + u8 action; + struct mlx5e_l2_rule ai; + bool mpfs; +}; + +static inline int mlx5e_hash_l2(const u8 *addr) +{ + return addr[5]; +} + +struct dentry *mlx5e_fs_get_debugfs_root(struct mlx5e_flow_steering *fs) +{ + return fs->dfs_root; +} + +static void mlx5e_add_l2_to_hash(struct hlist_head *hash, const u8 *addr) +{ + struct mlx5e_l2_hash_node *hn; + int ix = mlx5e_hash_l2(addr); + int found = 0; + + hlist_for_each_entry(hn, &hash[ix], hlist) + if (ether_addr_equal_64bits(hn->ai.addr, addr)) { + found = 1; + break; + } + + if (found) { + hn->action = MLX5E_ACTION_NONE; + return; + } + + hn = kzalloc(sizeof(*hn), GFP_ATOMIC); + if (!hn) + return; + + ether_addr_copy(hn->ai.addr, addr); + hn->action = MLX5E_ACTION_ADD; + + hlist_add_head(&hn->hlist, &hash[ix]); +} + +static void mlx5e_del_l2_from_hash(struct mlx5e_l2_hash_node *hn) +{ + hlist_del(&hn->hlist); + kfree(hn); +} + +struct mlx5e_vlan_table { + struct mlx5e_flow_table ft; + DECLARE_BITMAP(active_cvlans, VLAN_N_VID); + DECLARE_BITMAP(active_svlans, VLAN_N_VID); + struct mlx5_flow_handle *active_cvlans_rule[VLAN_N_VID]; + struct mlx5_flow_handle *active_svlans_rule[VLAN_N_VID]; + struct mlx5_flow_handle *untagged_rule; + struct mlx5_flow_handle *any_cvlan_rule; + struct mlx5_flow_handle *any_svlan_rule; + struct mlx5_flow_handle *trap_rule; + bool cvlan_filter_disabled; +}; + +unsigned long *mlx5e_vlan_get_active_svlans(struct mlx5e_vlan_table *vlan) +{ + return vlan->active_svlans; +} + +struct mlx5_flow_table *mlx5e_vlan_get_flowtable(struct mlx5e_vlan_table *vlan) +{ + return vlan->ft.t; +} + +static int mlx5e_vport_context_update_vlans(struct mlx5e_flow_steering *fs) +{ + int max_list_size; + int list_size; + u16 *vlans; + int vlan; + int err; + int i; + + list_size = 0; + for_each_set_bit(vlan, fs->vlan->active_cvlans, VLAN_N_VID) + list_size++; + + max_list_size = 1 << MLX5_CAP_GEN(fs->mdev, log_max_vlan_list); + + if (list_size > max_list_size) { + fs_warn(fs, "netdev vlans list size (%d) > (%d) max vport list size, some vlans will be dropped\n", + list_size, max_list_size); + list_size = max_list_size; + } + + vlans = kvcalloc(list_size, sizeof(*vlans), GFP_KERNEL); + if (!vlans) + return -ENOMEM; + + i = 0; + for_each_set_bit(vlan, fs->vlan->active_cvlans, VLAN_N_VID) { + if (i >= list_size) + break; + vlans[i++] = vlan; + } + + err = mlx5_modify_nic_vport_vlans(fs->mdev, vlans, list_size); + if (err) + fs_err(fs, "Failed to modify vport vlans list err(%d)\n", + err); + + kvfree(vlans); + return err; +} + +enum mlx5e_vlan_rule_type { + MLX5E_VLAN_RULE_TYPE_UNTAGGED, + MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, + MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, + MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, + MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, +}; + +static int __mlx5e_add_vlan_rule(struct mlx5e_flow_steering *fs, + enum mlx5e_vlan_rule_type rule_type, + u16 vid, struct mlx5_flow_spec *spec) +{ + struct mlx5_flow_table *ft = fs->vlan->ft.t; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_handle **rule_p; + MLX5_DECLARE_FLOW_ACT(flow_act); + int err = 0; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = fs->l2.ft.t; + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + switch (rule_type) { + case MLX5E_VLAN_RULE_TYPE_UNTAGGED: + /* cvlan_tag enabled in match criteria and + * disabled in match value means both S & C tags + * don't exist (untagged of both) + */ + rule_p = &fs->vlan->untagged_rule; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.cvlan_tag); + break; + case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID: + rule_p = &fs->vlan->any_cvlan_rule; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1); + break; + case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID: + rule_p = &fs->vlan->any_svlan_rule; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.svlan_tag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1); + break; + case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID: + rule_p = &fs->vlan->active_svlans_rule[vid]; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.svlan_tag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.first_vid); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, + vid); + break; + default: /* MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID */ + rule_p = &fs->vlan->active_cvlans_rule[vid]; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.first_vid); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, + vid); + break; + } + + if (WARN_ONCE(*rule_p, "VLAN rule already exists type %d", rule_type)) + return 0; + + *rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + + if (IS_ERR(*rule_p)) { + err = PTR_ERR(*rule_p); + *rule_p = NULL; + fs_err(fs, "add rule failed\n"); + } + + return err; +} + +static int mlx5e_add_vlan_rule(struct mlx5e_flow_steering *fs, + enum mlx5e_vlan_rule_type rule_type, u16 vid) +{ + struct mlx5_flow_spec *spec; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID) + mlx5e_vport_context_update_vlans(fs); + + err = __mlx5e_add_vlan_rule(fs, rule_type, vid, spec); + + kvfree(spec); + + return err; +} + +static void mlx5e_fs_del_vlan_rule(struct mlx5e_flow_steering *fs, + enum mlx5e_vlan_rule_type rule_type, u16 vid) +{ + switch (rule_type) { + case MLX5E_VLAN_RULE_TYPE_UNTAGGED: + if (fs->vlan->untagged_rule) { + mlx5_del_flow_rules(fs->vlan->untagged_rule); + fs->vlan->untagged_rule = NULL; + } + break; + case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID: + if (fs->vlan->any_cvlan_rule) { + mlx5_del_flow_rules(fs->vlan->any_cvlan_rule); + fs->vlan->any_cvlan_rule = NULL; + } + break; + case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID: + if (fs->vlan->any_svlan_rule) { + mlx5_del_flow_rules(fs->vlan->any_svlan_rule); + fs->vlan->any_svlan_rule = NULL; + } + break; + case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID: + if (fs->vlan->active_svlans_rule[vid]) { + mlx5_del_flow_rules(fs->vlan->active_svlans_rule[vid]); + fs->vlan->active_svlans_rule[vid] = NULL; + } + break; + case MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID: + if (fs->vlan->active_cvlans_rule[vid]) { + mlx5_del_flow_rules(fs->vlan->active_cvlans_rule[vid]); + fs->vlan->active_cvlans_rule[vid] = NULL; + } + mlx5e_vport_context_update_vlans(fs); + break; + } +} + +static void mlx5e_fs_del_any_vid_rules(struct mlx5e_flow_steering *fs) +{ + mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); + mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0); +} + +static int mlx5e_fs_add_any_vid_rules(struct mlx5e_flow_steering *fs) +{ + int err; + + err = mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); + if (err) + return err; + + return mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0); +} + +static struct mlx5_flow_handle * +mlx5e_add_trap_rule(struct mlx5_flow_table *ft, int trap_id, int tir_num) +{ + struct mlx5_flow_destination dest = {}; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + spec->flow_context.flags |= FLOW_CONTEXT_HAS_TAG; + spec->flow_context.flow_tag = trap_id; + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = tir_num; + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + kvfree(spec); + return rule; +} + +int mlx5e_add_vlan_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num) +{ + struct mlx5_flow_table *ft = fs->vlan->ft.t; + struct mlx5_flow_handle *rule; + int err; + + rule = mlx5e_add_trap_rule(ft, trap_id, tir_num); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + fs->vlan->trap_rule = NULL; + fs_err(fs, "add VLAN trap rule failed, err %d\n", err); + return err; + } + fs->vlan->trap_rule = rule; + return 0; +} + +void mlx5e_remove_vlan_trap(struct mlx5e_flow_steering *fs) +{ + if (fs->vlan->trap_rule) { + mlx5_del_flow_rules(fs->vlan->trap_rule); + fs->vlan->trap_rule = NULL; + } +} + +int mlx5e_add_mac_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num) +{ + struct mlx5_flow_table *ft = fs->l2.ft.t; + struct mlx5_flow_handle *rule; + int err; + + rule = mlx5e_add_trap_rule(ft, trap_id, tir_num); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + fs->l2.trap_rule = NULL; + fs_err(fs, "add MAC trap rule failed, err %d\n", err); + return err; + } + fs->l2.trap_rule = rule; + return 0; +} + +void mlx5e_remove_mac_trap(struct mlx5e_flow_steering *fs) +{ + if (fs->l2.trap_rule) { + mlx5_del_flow_rules(fs->l2.trap_rule); + fs->l2.trap_rule = NULL; + } +} + +void mlx5e_enable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc) +{ + if (!fs->vlan->cvlan_filter_disabled) + return; + + fs->vlan->cvlan_filter_disabled = false; + if (promisc) + return; + mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); +} + +void mlx5e_disable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc) +{ + if (!fs->vlan || fs->vlan->cvlan_filter_disabled) + return; + + fs->vlan->cvlan_filter_disabled = true; + if (promisc) + return; + mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); +} + +static int mlx5e_vlan_rx_add_cvid(struct mlx5e_flow_steering *fs, u16 vid) +{ + int err; + + set_bit(vid, fs->vlan->active_cvlans); + + err = mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid); + if (err) + clear_bit(vid, fs->vlan->active_cvlans); + + return err; +} + +static int mlx5e_vlan_rx_add_svid(struct mlx5e_flow_steering *fs, + struct net_device *netdev, u16 vid) +{ + int err; + + set_bit(vid, fs->vlan->active_svlans); + + err = mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid); + if (err) { + clear_bit(vid, fs->vlan->active_svlans); + return err; + } + + /* Need to fix some features.. */ + netdev_update_features(netdev); + return err; +} + +int mlx5e_fs_vlan_rx_add_vid(struct mlx5e_flow_steering *fs, + struct net_device *netdev, + __be16 proto, u16 vid) +{ + + if (!fs->vlan) { + fs_err(fs, "Vlan doesn't exist\n"); + return -EINVAL; + } + + if (be16_to_cpu(proto) == ETH_P_8021Q) + return mlx5e_vlan_rx_add_cvid(fs, vid); + else if (be16_to_cpu(proto) == ETH_P_8021AD) + return mlx5e_vlan_rx_add_svid(fs, netdev, vid); + + return -EOPNOTSUPP; +} + +int mlx5e_fs_vlan_rx_kill_vid(struct mlx5e_flow_steering *fs, + struct net_device *netdev, + __be16 proto, u16 vid) +{ + if (!fs->vlan) { + fs_err(fs, "Vlan doesn't exist\n"); + return -EINVAL; + } + + if (be16_to_cpu(proto) == ETH_P_8021Q) { + clear_bit(vid, fs->vlan->active_cvlans); + mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid); + } else if (be16_to_cpu(proto) == ETH_P_8021AD) { + clear_bit(vid, fs->vlan->active_svlans); + mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid); + netdev_update_features(netdev); + } + + return 0; +} + +static void mlx5e_fs_add_vlan_rules(struct mlx5e_flow_steering *fs) +{ + int i; + + mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + + for_each_set_bit(i, fs->vlan->active_cvlans, VLAN_N_VID) { + mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i); + } + + for_each_set_bit(i, fs->vlan->active_svlans, VLAN_N_VID) + mlx5e_add_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i); + + if (fs->vlan->cvlan_filter_disabled) + mlx5e_fs_add_any_vid_rules(fs); +} + +static void mlx5e_del_vlan_rules(struct mlx5e_flow_steering *fs) +{ + int i; + + mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + + for_each_set_bit(i, fs->vlan->active_cvlans, VLAN_N_VID) { + mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i); + } + + for_each_set_bit(i, fs->vlan->active_svlans, VLAN_N_VID) + mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i); + + WARN_ON_ONCE(fs->state_destroy); + + mlx5e_remove_vlan_trap(fs); + + /* must be called after DESTROY bit is set and + * set_rx_mode is called and flushed + */ + if (fs->vlan->cvlan_filter_disabled) + mlx5e_fs_del_any_vid_rules(fs); +} + +#define mlx5e_for_each_hash_node(hn, tmp, hash, i) \ + for (i = 0; i < MLX5E_L2_ADDR_HASH_SIZE; i++) \ + hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) + +static void mlx5e_execute_l2_action(struct mlx5e_flow_steering *fs, + struct mlx5e_l2_hash_node *hn) +{ + u8 action = hn->action; + u8 mac_addr[ETH_ALEN]; + int l2_err = 0; + + ether_addr_copy(mac_addr, hn->ai.addr); + + switch (action) { + case MLX5E_ACTION_ADD: + mlx5e_add_l2_flow_rule(fs, &hn->ai, MLX5E_FULLMATCH); + if (!is_multicast_ether_addr(mac_addr)) { + l2_err = mlx5_mpfs_add_mac(fs->mdev, mac_addr); + hn->mpfs = !l2_err; + } + hn->action = MLX5E_ACTION_NONE; + break; + + case MLX5E_ACTION_DEL: + if (!is_multicast_ether_addr(mac_addr) && hn->mpfs) + l2_err = mlx5_mpfs_del_mac(fs->mdev, mac_addr); + mlx5e_del_l2_flow_rule(fs, &hn->ai); + mlx5e_del_l2_from_hash(hn); + break; + } + + if (l2_err) + fs_warn(fs, "MPFS, failed to %s mac %pM, err(%d)\n", + action == MLX5E_ACTION_ADD ? "add" : "del", + mac_addr, l2_err); +} + +static void mlx5e_sync_netdev_addr(struct mlx5e_flow_steering *fs, + struct net_device *netdev) +{ + struct netdev_hw_addr *ha; + + netif_addr_lock_bh(netdev); + + mlx5e_add_l2_to_hash(fs->l2.netdev_uc, netdev->dev_addr); + netdev_for_each_uc_addr(ha, netdev) + mlx5e_add_l2_to_hash(fs->l2.netdev_uc, ha->addr); + + netdev_for_each_mc_addr(ha, netdev) + mlx5e_add_l2_to_hash(fs->l2.netdev_mc, ha->addr); + + netif_addr_unlock_bh(netdev); +} + +static void mlx5e_fill_addr_array(struct mlx5e_flow_steering *fs, int list_type, + struct net_device *ndev, + u8 addr_array[][ETH_ALEN], int size) +{ + bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC); + struct mlx5e_l2_hash_node *hn; + struct hlist_head *addr_list; + struct hlist_node *tmp; + int i = 0; + int hi; + + addr_list = is_uc ? fs->l2.netdev_uc : fs->l2.netdev_mc; + + if (is_uc) /* Make sure our own address is pushed first */ + ether_addr_copy(addr_array[i++], ndev->dev_addr); + else if (fs->l2.broadcast_enabled) + ether_addr_copy(addr_array[i++], ndev->broadcast); + + mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) { + if (ether_addr_equal(ndev->dev_addr, hn->ai.addr)) + continue; + if (i >= size) + break; + ether_addr_copy(addr_array[i++], hn->ai.addr); + } +} + +static void mlx5e_vport_context_update_addr_list(struct mlx5e_flow_steering *fs, + struct net_device *netdev, + int list_type) +{ + bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC); + struct mlx5e_l2_hash_node *hn; + u8 (*addr_array)[ETH_ALEN] = NULL; + struct hlist_head *addr_list; + struct hlist_node *tmp; + int max_size; + int size; + int err; + int hi; + + size = is_uc ? 0 : (fs->l2.broadcast_enabled ? 1 : 0); + max_size = is_uc ? + 1 << MLX5_CAP_GEN(fs->mdev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(fs->mdev, log_max_current_mc_list); + + addr_list = is_uc ? fs->l2.netdev_uc : fs->l2.netdev_mc; + mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) + size++; + + if (size > max_size) { + fs_warn(fs, "mdev %s list size (%d) > (%d) max vport list size, some addresses will be dropped\n", + is_uc ? "UC" : "MC", size, max_size); + size = max_size; + } + + if (size) { + addr_array = kcalloc(size, ETH_ALEN, GFP_KERNEL); + if (!addr_array) { + err = -ENOMEM; + goto out; + } + mlx5e_fill_addr_array(fs, list_type, netdev, addr_array, size); + } + + err = mlx5_modify_nic_vport_mac_list(fs->mdev, list_type, addr_array, size); +out: + if (err) + fs_err(fs, "Failed to modify vport %s list err(%d)\n", + is_uc ? "UC" : "MC", err); + kfree(addr_array); +} + +static void mlx5e_vport_context_update(struct mlx5e_flow_steering *fs, + struct net_device *netdev) +{ + struct mlx5e_l2_table *ea = &fs->l2; + + mlx5e_vport_context_update_addr_list(fs, netdev, MLX5_NVPRT_LIST_TYPE_UC); + mlx5e_vport_context_update_addr_list(fs, netdev, MLX5_NVPRT_LIST_TYPE_MC); + mlx5_modify_nic_vport_promisc(fs->mdev, 0, + ea->allmulti_enabled, + ea->promisc_enabled); +} + +static void mlx5e_apply_netdev_addr(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_l2_hash_node *hn; + struct hlist_node *tmp; + int i; + + mlx5e_for_each_hash_node(hn, tmp, fs->l2.netdev_uc, i) + mlx5e_execute_l2_action(fs, hn); + + mlx5e_for_each_hash_node(hn, tmp, fs->l2.netdev_mc, i) + mlx5e_execute_l2_action(fs, hn); +} + +static void mlx5e_handle_netdev_addr(struct mlx5e_flow_steering *fs, + struct net_device *netdev) +{ + struct mlx5e_l2_hash_node *hn; + struct hlist_node *tmp; + int i; + + mlx5e_for_each_hash_node(hn, tmp, fs->l2.netdev_uc, i) + hn->action = MLX5E_ACTION_DEL; + mlx5e_for_each_hash_node(hn, tmp, fs->l2.netdev_mc, i) + hn->action = MLX5E_ACTION_DEL; + + if (fs->state_destroy) + mlx5e_sync_netdev_addr(fs, netdev); + + mlx5e_apply_netdev_addr(fs); +} + +#define MLX5E_PROMISC_GROUP0_SIZE BIT(0) +#define MLX5E_PROMISC_TABLE_SIZE MLX5E_PROMISC_GROUP0_SIZE + +static int mlx5e_add_promisc_rule(struct mlx5e_flow_steering *fs) +{ + struct mlx5_flow_table *ft = fs->promisc.ft.t; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_handle **rule_p; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_spec *spec; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = mlx5_get_ttc_flow_table(fs->ttc); + + rule_p = &fs->promisc.rule; + *rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + if (IS_ERR(*rule_p)) { + err = PTR_ERR(*rule_p); + *rule_p = NULL; + fs_err(fs, "add promiscuous rule failed\n"); + } + kvfree(spec); + return err; +} + +static int mlx5e_create_promisc_table(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_flow_table *ft = &fs->promisc.ft; + struct mlx5_flow_table_attr ft_attr = {}; + int err; + + ft_attr.max_fte = MLX5E_PROMISC_TABLE_SIZE; + ft_attr.autogroup.max_num_groups = 1; + ft_attr.level = MLX5E_PROMISC_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_auto_grouped_flow_table(fs->ns, &ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + fs_err(fs, "fail to create promisc table err=%d\n", err); + return err; + } + + err = mlx5e_add_promisc_rule(fs); + if (err) + goto err_destroy_promisc_table; + + return 0; + +err_destroy_promisc_table: + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; + + return err; +} + +static void mlx5e_del_promisc_rule(struct mlx5e_flow_steering *fs) +{ + if (WARN(!fs->promisc.rule, "Trying to remove non-existing promiscuous rule")) + return; + mlx5_del_flow_rules(fs->promisc.rule); + fs->promisc.rule = NULL; +} + +static void mlx5e_destroy_promisc_table(struct mlx5e_flow_steering *fs) +{ + if (!fs->promisc.ft.t) + return; + mlx5e_del_promisc_rule(fs); + mlx5_destroy_flow_table(fs->promisc.ft.t); + fs->promisc.ft.t = NULL; +} + +void mlx5e_fs_set_rx_mode_work(struct mlx5e_flow_steering *fs, + struct net_device *netdev) +{ + struct mlx5e_l2_table *ea = &fs->l2; + + bool rx_mode_enable = fs->state_destroy; + bool promisc_enabled = rx_mode_enable && (netdev->flags & IFF_PROMISC); + bool allmulti_enabled = rx_mode_enable && (netdev->flags & IFF_ALLMULTI); + bool broadcast_enabled = rx_mode_enable; + + bool enable_promisc = !ea->promisc_enabled && promisc_enabled; + bool disable_promisc = ea->promisc_enabled && !promisc_enabled; + bool enable_allmulti = !ea->allmulti_enabled && allmulti_enabled; + bool disable_allmulti = ea->allmulti_enabled && !allmulti_enabled; + bool enable_broadcast = !ea->broadcast_enabled && broadcast_enabled; + bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled; + int err; + + if (enable_promisc) { + err = mlx5e_create_promisc_table(fs); + if (err) + enable_promisc = false; + if (!fs->vlan_strip_disable && !err) + fs_warn_once(fs, + "S-tagged traffic will be dropped while C-tag vlan stripping is enabled\n"); + } + if (enable_allmulti) + mlx5e_add_l2_flow_rule(fs, &ea->allmulti, MLX5E_ALLMULTI); + if (enable_broadcast) + mlx5e_add_l2_flow_rule(fs, &ea->broadcast, MLX5E_FULLMATCH); + + mlx5e_handle_netdev_addr(fs, netdev); + + if (disable_broadcast) + mlx5e_del_l2_flow_rule(fs, &ea->broadcast); + if (disable_allmulti) + mlx5e_del_l2_flow_rule(fs, &ea->allmulti); + if (disable_promisc) + mlx5e_destroy_promisc_table(fs); + + ea->promisc_enabled = promisc_enabled; + ea->allmulti_enabled = allmulti_enabled; + ea->broadcast_enabled = broadcast_enabled; + + mlx5e_vport_context_update(fs, netdev); +} + +static void mlx5e_destroy_groups(struct mlx5e_flow_table *ft) +{ + int i; + + for (i = ft->num_groups - 1; i >= 0; i--) { + if (!IS_ERR_OR_NULL(ft->g[i])) + mlx5_destroy_flow_group(ft->g[i]); + ft->g[i] = NULL; + } + ft->num_groups = 0; +} + +void mlx5e_fs_init_l2_addr(struct mlx5e_flow_steering *fs, struct net_device *netdev) +{ + ether_addr_copy(fs->l2.broadcast.addr, netdev->broadcast); +} + +void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft) +{ + mlx5e_destroy_groups(ft); + kfree(ft->g); + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; +} + +static void mlx5e_set_inner_ttc_params(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, + struct ttc_params *ttc_params) +{ + struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; + int tt; + + memset(ttc_params, 0, sizeof(*ttc_params)); + ttc_params->ns = mlx5_get_flow_namespace(fs->mdev, + MLX5_FLOW_NAMESPACE_KERNEL); + ft_attr->level = MLX5E_INNER_TTC_FT_LEVEL; + ft_attr->prio = MLX5E_NIC_PRIO; + + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; + ttc_params->dests[tt].tir_num = + tt == MLX5_TT_ANY ? + mlx5e_rx_res_get_tirn_direct(rx_res, 0) : + mlx5e_rx_res_get_tirn_rss_inner(rx_res, + tt); + } +} + +void mlx5e_set_ttc_params(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, + struct ttc_params *ttc_params, bool tunnel) + +{ + struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; + int tt; + + memset(ttc_params, 0, sizeof(*ttc_params)); + ttc_params->ns = mlx5_get_flow_namespace(fs->mdev, + MLX5_FLOW_NAMESPACE_KERNEL); + ft_attr->level = MLX5E_TTC_FT_LEVEL; + ft_attr->prio = MLX5E_NIC_PRIO; + + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; + ttc_params->dests[tt].tir_num = + tt == MLX5_TT_ANY ? + mlx5e_rx_res_get_tirn_direct(rx_res, 0) : + mlx5e_rx_res_get_tirn_rss(rx_res, tt); + } + + ttc_params->inner_ttc = tunnel; + if (!tunnel || !mlx5_tunnel_inner_ft_supported(fs->mdev)) + return; + + for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { + ttc_params->tunnel_dests[tt].type = + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + ttc_params->tunnel_dests[tt].ft = + mlx5_get_ttc_flow_table(fs->inner_ttc); + } +} + +static void mlx5e_del_l2_flow_rule(struct mlx5e_flow_steering *fs, + struct mlx5e_l2_rule *ai) +{ + if (!IS_ERR_OR_NULL(ai->rule)) { + mlx5_del_flow_rules(ai->rule); + ai->rule = NULL; + } +} + +static int mlx5e_add_l2_flow_rule(struct mlx5e_flow_steering *fs, + struct mlx5e_l2_rule *ai, int type) +{ + struct mlx5_flow_table *ft = fs->l2.ft.t; + struct mlx5_flow_destination dest = {}; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_spec *spec; + int err = 0; + u8 *mc_dmac; + u8 *mv_dmac; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + mc_dmac = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.dmac_47_16); + mv_dmac = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dmac_47_16); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = mlx5_get_ttc_flow_table(fs->ttc); + + switch (type) { + case MLX5E_FULLMATCH: + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + eth_broadcast_addr(mc_dmac); + ether_addr_copy(mv_dmac, ai->addr); + break; + + case MLX5E_ALLMULTI: + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + mc_dmac[0] = 0x01; + mv_dmac[0] = 0x01; + break; + } + + ai->rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + if (IS_ERR(ai->rule)) { + fs_err(fs, "add l2 rule(mac:%pM) failed\n", mv_dmac); + err = PTR_ERR(ai->rule); + ai->rule = NULL; + } + + kvfree(spec); + + return err; +} + +#define MLX5E_NUM_L2_GROUPS 3 +#define MLX5E_L2_GROUP1_SIZE BIT(15) +#define MLX5E_L2_GROUP2_SIZE BIT(0) +#define MLX5E_L2_GROUP_TRAP_SIZE BIT(0) /* must be last */ +#define MLX5E_L2_TABLE_SIZE (MLX5E_L2_GROUP1_SIZE +\ + MLX5E_L2_GROUP2_SIZE +\ + MLX5E_L2_GROUP_TRAP_SIZE) +static int mlx5e_create_l2_table_groups(struct mlx5e_l2_table *l2_table) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5e_flow_table *ft = &l2_table->ft; + int ix = 0; + u8 *mc_dmac; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_NUM_L2_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + kfree(ft->g); + return -ENOMEM; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + mc_dmac = MLX5_ADDR_OF(fte_match_param, mc, + outer_headers.dmac_47_16); + /* Flow Group for full match */ + eth_broadcast_addr(mc_dmac); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_L2_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + /* Flow Group for allmulti */ + eth_zero_addr(mc_dmac); + mc_dmac[0] = 0x01; + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_L2_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + /* Flow Group for l2 traps */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_L2_GROUP_TRAP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + kvfree(in); + return 0; + +err_destroy_groups: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + mlx5e_destroy_groups(ft); + kvfree(in); + kfree(ft->g); + + return err; +} + +static void mlx5e_destroy_l2_table(struct mlx5e_flow_steering *fs) +{ + mlx5e_destroy_flow_table(&fs->l2.ft); +} + +static int mlx5e_create_l2_table(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_l2_table *l2_table = &fs->l2; + struct mlx5e_flow_table *ft = &l2_table->ft; + struct mlx5_flow_table_attr ft_attr = {}; + int err; + + ft->num_groups = 0; + + ft_attr.max_fte = MLX5E_L2_TABLE_SIZE; + ft_attr.level = MLX5E_L2_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(fs->ns, &ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + err = mlx5e_create_l2_table_groups(l2_table); + if (err) + goto err_destroy_flow_table; + + return 0; + +err_destroy_flow_table: + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; + + return err; +} + +#define MLX5E_NUM_VLAN_GROUPS 5 +#define MLX5E_VLAN_GROUP0_SIZE BIT(12) +#define MLX5E_VLAN_GROUP1_SIZE BIT(12) +#define MLX5E_VLAN_GROUP2_SIZE BIT(1) +#define MLX5E_VLAN_GROUP3_SIZE BIT(0) +#define MLX5E_VLAN_GROUP_TRAP_SIZE BIT(0) /* must be last */ +#define MLX5E_VLAN_TABLE_SIZE (MLX5E_VLAN_GROUP0_SIZE +\ + MLX5E_VLAN_GROUP1_SIZE +\ + MLX5E_VLAN_GROUP2_SIZE +\ + MLX5E_VLAN_GROUP3_SIZE +\ + MLX5E_VLAN_GROUP_TRAP_SIZE) + +static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in, + int inlen) +{ + int err; + int ix = 0; + u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP0_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP_TRAP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + + return 0; + +err_destroy_groups: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + mlx5e_destroy_groups(ft); + + return err; +} + +static int mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft) +{ + u32 *in; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + err = __mlx5e_create_vlan_table_groups(ft, in, inlen); + + kvfree(in); + return err; +} + +static int mlx5e_fs_create_vlan_table(struct mlx5e_flow_steering *fs) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5e_flow_table *ft; + int err; + + ft = &fs->vlan->ft; + ft->num_groups = 0; + + ft_attr.max_fte = MLX5E_VLAN_TABLE_SIZE; + ft_attr.level = MLX5E_VLAN_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(fs->ns, &ft_attr); + if (IS_ERR(ft->t)) + return PTR_ERR(ft->t); + + ft->g = kcalloc(MLX5E_NUM_VLAN_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) { + err = -ENOMEM; + goto err_destroy_vlan_table; + } + + err = mlx5e_create_vlan_table_groups(ft); + if (err) + goto err_free_g; + + mlx5e_fs_add_vlan_rules(fs); + + return 0; + +err_free_g: + kfree(ft->g); +err_destroy_vlan_table: + mlx5_destroy_flow_table(ft->t); + + return err; +} + +static void mlx5e_destroy_vlan_table(struct mlx5e_flow_steering *fs) +{ + mlx5e_del_vlan_rules(fs); + mlx5e_destroy_flow_table(&fs->vlan->ft); +} + +static void mlx5e_destroy_inner_ttc_table(struct mlx5e_flow_steering *fs) +{ + if (!mlx5_tunnel_inner_ft_supported(fs->mdev)) + return; + mlx5_destroy_ttc_table(fs->inner_ttc); +} + +void mlx5e_destroy_ttc_table(struct mlx5e_flow_steering *fs) +{ + mlx5_destroy_ttc_table(fs->ttc); +} + +static int mlx5e_create_inner_ttc_table(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res) +{ + struct ttc_params ttc_params = {}; + + if (!mlx5_tunnel_inner_ft_supported(fs->mdev)) + return 0; + + mlx5e_set_inner_ttc_params(fs, rx_res, &ttc_params); + fs->inner_ttc = mlx5_create_inner_ttc_table(fs->mdev, + &ttc_params); + if (IS_ERR(fs->inner_ttc)) + return PTR_ERR(fs->inner_ttc); + return 0; +} + +int mlx5e_create_ttc_table(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res) +{ + struct ttc_params ttc_params = {}; + + mlx5e_set_ttc_params(fs, rx_res, &ttc_params, true); + fs->ttc = mlx5_create_ttc_table(fs->mdev, &ttc_params); + if (IS_ERR(fs->ttc)) + return PTR_ERR(fs->ttc); + return 0; +} + +int mlx5e_create_flow_steering(struct mlx5e_flow_steering *fs, + struct mlx5e_rx_res *rx_res, + const struct mlx5e_profile *profile, + struct net_device *netdev) +{ + struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(fs->mdev, + MLX5_FLOW_NAMESPACE_KERNEL); + int err; + + if (!ns) + return -EOPNOTSUPP; + + mlx5e_fs_set_ns(fs, ns, false); + err = mlx5e_arfs_create_tables(fs, rx_res, + !!(netdev->hw_features & NETIF_F_NTUPLE)); + if (err) { + fs_err(fs, "Failed to create arfs tables, err=%d\n", err); + netdev->hw_features &= ~NETIF_F_NTUPLE; + } + + err = mlx5e_create_inner_ttc_table(fs, rx_res); + if (err) { + fs_err(fs, "Failed to create inner ttc table, err=%d\n", err); + goto err_destroy_arfs_tables; + } + + err = mlx5e_create_ttc_table(fs, rx_res); + if (err) { + fs_err(fs, "Failed to create ttc table, err=%d\n", err); + goto err_destroy_inner_ttc_table; + } + + err = mlx5e_create_l2_table(fs); + if (err) { + fs_err(fs, "Failed to create l2 table, err=%d\n", err); + goto err_destroy_ttc_table; + } + + err = mlx5e_fs_create_vlan_table(fs); + if (err) { + fs_err(fs, "Failed to create vlan table, err=%d\n", err); + goto err_destroy_l2_table; + } + + err = mlx5e_ptp_alloc_rx_fs(fs, profile); + if (err) + goto err_destory_vlan_table; + + mlx5e_ethtool_init_steering(fs); + + return 0; + +err_destory_vlan_table: + mlx5e_destroy_vlan_table(fs); +err_destroy_l2_table: + mlx5e_destroy_l2_table(fs); +err_destroy_ttc_table: + mlx5e_destroy_ttc_table(fs); +err_destroy_inner_ttc_table: + mlx5e_destroy_inner_ttc_table(fs); +err_destroy_arfs_tables: + mlx5e_arfs_destroy_tables(fs, !!(netdev->hw_features & NETIF_F_NTUPLE)); + + return err; +} + +void mlx5e_destroy_flow_steering(struct mlx5e_flow_steering *fs, bool ntuple, + const struct mlx5e_profile *profile) +{ + mlx5e_ptp_free_rx_fs(fs, profile); + mlx5e_destroy_vlan_table(fs); + mlx5e_destroy_l2_table(fs); + mlx5e_destroy_ttc_table(fs); + mlx5e_destroy_inner_ttc_table(fs); + mlx5e_arfs_destroy_tables(fs, ntuple); + mlx5e_ethtool_cleanup_steering(fs); +} + +static int mlx5e_fs_vlan_alloc(struct mlx5e_flow_steering *fs) +{ + fs->vlan = kvzalloc(sizeof(*fs->vlan), GFP_KERNEL); + if (!fs->vlan) + return -ENOMEM; + return 0; +} + +static void mlx5e_fs_vlan_free(struct mlx5e_flow_steering *fs) +{ + kvfree(fs->vlan); +} + +struct mlx5e_vlan_table *mlx5e_fs_get_vlan(struct mlx5e_flow_steering *fs) +{ + return fs->vlan; +} + +static int mlx5e_fs_tc_alloc(struct mlx5e_flow_steering *fs) +{ + fs->tc = mlx5e_tc_table_alloc(); + if (IS_ERR(fs->tc)) + return -ENOMEM; + return 0; +} + +static void mlx5e_fs_tc_free(struct mlx5e_flow_steering *fs) +{ + mlx5e_tc_table_free(fs->tc); +} + +struct mlx5e_tc_table *mlx5e_fs_get_tc(struct mlx5e_flow_steering *fs) +{ + return fs->tc; +} + +#ifdef CONFIG_MLX5_EN_RXNFC +static int mlx5e_fs_ethtool_alloc(struct mlx5e_flow_steering *fs) +{ + return mlx5e_ethtool_alloc(&fs->ethtool); +} + +static void mlx5e_fs_ethtool_free(struct mlx5e_flow_steering *fs) +{ + mlx5e_ethtool_free(fs->ethtool); +} + +struct mlx5e_ethtool_steering *mlx5e_fs_get_ethtool(struct mlx5e_flow_steering *fs) +{ + return fs->ethtool; +} +#else +static int mlx5e_fs_ethtool_alloc(struct mlx5e_flow_steering *fs) +{ return 0; } +static void mlx5e_fs_ethtool_free(struct mlx5e_flow_steering *fs) { } +#endif + +static void mlx5e_fs_debugfs_init(struct mlx5e_flow_steering *fs, + struct dentry *dfs_root) +{ + if (IS_ERR_OR_NULL(dfs_root)) + return; + + fs->dfs_root = debugfs_create_dir("fs", dfs_root); +} + +struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile, + struct mlx5_core_dev *mdev, + bool state_destroy, + struct dentry *dfs_root) +{ + struct mlx5e_flow_steering *fs; + int err; + + fs = kvzalloc(sizeof(*fs), GFP_KERNEL); + if (!fs) + goto err; + + fs->mdev = mdev; + fs->state_destroy = state_destroy; + if (mlx5e_profile_feature_cap(profile, FS_VLAN)) { + err = mlx5e_fs_vlan_alloc(fs); + if (err) + goto err_free_fs; + } + + if (mlx5e_profile_feature_cap(profile, FS_TC)) { + err = mlx5e_fs_tc_alloc(fs); + if (err) + goto err_free_vlan; + } + + err = mlx5e_fs_ethtool_alloc(fs); + if (err) + goto err_free_tc; + + mlx5e_fs_debugfs_init(fs, dfs_root); + + return fs; +err_free_tc: + mlx5e_fs_tc_free(fs); +err_free_vlan: + mlx5e_fs_vlan_free(fs); +err_free_fs: + kvfree(fs); +err: + return NULL; +} + +void mlx5e_fs_cleanup(struct mlx5e_flow_steering *fs) +{ + if (!fs) + return; + debugfs_remove_recursive(fs->dfs_root); + mlx5e_fs_ethtool_free(fs); + mlx5e_fs_tc_free(fs); + mlx5e_fs_vlan_free(fs); + kvfree(fs); +} + +struct mlx5e_l2_table *mlx5e_fs_get_l2(struct mlx5e_flow_steering *fs) +{ + return &fs->l2; +} + +struct mlx5_flow_namespace *mlx5e_fs_get_ns(struct mlx5e_flow_steering *fs, bool egress) +{ + return egress ? fs->egress_ns : fs->ns; +} + +void mlx5e_fs_set_ns(struct mlx5e_flow_steering *fs, struct mlx5_flow_namespace *ns, bool egress) +{ + if (!egress) + fs->ns = ns; + else + fs->egress_ns = ns; +} + +struct mlx5_ttc_table *mlx5e_fs_get_ttc(struct mlx5e_flow_steering *fs, bool inner) +{ + return inner ? fs->inner_ttc : fs->ttc; +} + +void mlx5e_fs_set_ttc(struct mlx5e_flow_steering *fs, struct mlx5_ttc_table *ttc, bool inner) +{ + if (!inner) + fs->ttc = ttc; + else + fs->inner_ttc = ttc; +} + +#ifdef CONFIG_MLX5_EN_ARFS +struct mlx5e_arfs_tables *mlx5e_fs_get_arfs(struct mlx5e_flow_steering *fs) +{ + return fs->arfs; +} + +void mlx5e_fs_set_arfs(struct mlx5e_flow_steering *fs, struct mlx5e_arfs_tables *arfs) +{ + fs->arfs = arfs; +} +#endif + +struct mlx5e_ptp_fs *mlx5e_fs_get_ptp(struct mlx5e_flow_steering *fs) +{ + return fs->ptp_fs; +} + +void mlx5e_fs_set_ptp(struct mlx5e_flow_steering *fs, struct mlx5e_ptp_fs *ptp_fs) +{ + fs->ptp_fs = ptp_fs; +} + +struct mlx5e_fs_any *mlx5e_fs_get_any(struct mlx5e_flow_steering *fs) +{ + return fs->any; +} + +void mlx5e_fs_set_any(struct mlx5e_flow_steering *fs, struct mlx5e_fs_any *any) +{ + fs->any = any; +} + +#ifdef CONFIG_MLX5_EN_TLS +struct mlx5e_accel_fs_tcp *mlx5e_fs_get_accel_tcp(struct mlx5e_flow_steering *fs) +{ + return fs->accel_tcp; +} + +void mlx5e_fs_set_accel_tcp(struct mlx5e_flow_steering *fs, struct mlx5e_accel_fs_tcp *accel_tcp) +{ + fs->accel_tcp = accel_tcp; +} +#endif + +void mlx5e_fs_set_state_destroy(struct mlx5e_flow_steering *fs, bool state_destroy) +{ + fs->state_destroy = state_destroy; +} + +void mlx5e_fs_set_vlan_strip_disable(struct mlx5e_flow_steering *fs, + bool vlan_strip_disable) +{ + fs->vlan_strip_disable = vlan_strip_disable; +} + +struct mlx5e_fs_udp *mlx5e_fs_get_udp(struct mlx5e_flow_steering *fs) +{ + return fs->udp; +} + +void mlx5e_fs_set_udp(struct mlx5e_flow_steering *fs, struct mlx5e_fs_udp *udp) +{ + fs->udp = udp; +} + +struct mlx5_core_dev *mlx5e_fs_get_mdev(struct mlx5e_flow_steering *fs) +{ + return fs->mdev; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c new file mode 100644 index 0000000000..3eccdadc03 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -0,0 +1,1024 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include "en.h" +#include "en/params.h" +#include "en/xsk/pool.h" +#include "en/fs_ethtool.h" + +struct mlx5e_ethtool_table { + struct mlx5_flow_table *ft; + int num_rules; +}; + +#define ETHTOOL_NUM_L3_L4_FTS 7 +#define ETHTOOL_NUM_L2_FTS 4 + +struct mlx5e_ethtool_steering { + struct mlx5e_ethtool_table l3_l4_ft[ETHTOOL_NUM_L3_L4_FTS]; + struct mlx5e_ethtool_table l2_ft[ETHTOOL_NUM_L2_FTS]; + struct list_head rules; + int tot_num_rules; +}; + +static int flow_type_to_traffic_type(u32 flow_type); + +static u32 flow_type_mask(u32 flow_type) +{ + return flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS); +} + +struct mlx5e_ethtool_rule { + struct list_head list; + struct ethtool_rx_flow_spec flow_spec; + struct mlx5_flow_handle *rule; + struct mlx5e_ethtool_table *eth_ft; + struct mlx5e_rss *rss; +}; + +static void put_flow_table(struct mlx5e_ethtool_table *eth_ft) +{ + if (!--eth_ft->num_rules) { + mlx5_destroy_flow_table(eth_ft->ft); + eth_ft->ft = NULL; + } +} + +#define MLX5E_ETHTOOL_L3_L4_PRIO 0 +#define MLX5E_ETHTOOL_L2_PRIO (MLX5E_ETHTOOL_L3_L4_PRIO + ETHTOOL_NUM_L3_L4_FTS) +#define MLX5E_ETHTOOL_NUM_ENTRIES 64000 +#define MLX5E_ETHTOOL_NUM_GROUPS 10 +static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv, + struct ethtool_rx_flow_spec *fs, + int num_tuples) +{ + struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5e_ethtool_table *eth_ft; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft; + int max_tuples; + int table_size; + int prio; + + switch (flow_type_mask(fs->flow_type)) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + case TCP_V6_FLOW: + case UDP_V6_FLOW: + case IP_USER_FLOW: + case IPV6_USER_FLOW: + max_tuples = ETHTOOL_NUM_L3_L4_FTS; + prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples); + eth_ft = ðtool->l3_l4_ft[prio]; + break; + case ETHER_FLOW: + max_tuples = ETHTOOL_NUM_L2_FTS; + prio = max_tuples - num_tuples; + eth_ft = ðtool->l2_ft[prio]; + prio += MLX5E_ETHTOOL_L2_PRIO; + break; + default: + return ERR_PTR(-EINVAL); + } + + eth_ft->num_rules++; + if (eth_ft->ft) + return eth_ft; + + ns = mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_ETHTOOL); + if (!ns) + return ERR_PTR(-EOPNOTSUPP); + + table_size = min_t(u32, BIT(MLX5_CAP_FLOWTABLE(priv->mdev, + flow_table_properties_nic_receive.log_max_ft_size)), + MLX5E_ETHTOOL_NUM_ENTRIES); + + ft_attr.prio = prio; + ft_attr.max_fte = table_size; + ft_attr.autogroup.max_num_groups = MLX5E_ETHTOOL_NUM_GROUPS; + ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) + return (void *)ft; + + eth_ft->ft = ft; + return eth_ft; +} + +static void mask_spec(u8 *mask, u8 *val, size_t size) +{ + unsigned int i; + + for (i = 0; i < size; i++, mask++, val++) + *((u8 *)val) = *((u8 *)mask) & *((u8 *)val); +} + +#define MLX5E_FTE_SET(header_p, fld, v) \ + MLX5_SET(fte_match_set_lyr_2_4, header_p, fld, v) + +#define MLX5E_FTE_ADDR_OF(header_p, fld) \ + MLX5_ADDR_OF(fte_match_set_lyr_2_4, header_p, fld) + +static void +set_ip4(void *headers_c, void *headers_v, __be32 ip4src_m, + __be32 ip4src_v, __be32 ip4dst_m, __be32 ip4dst_v) +{ + if (ip4src_m) { + memcpy(MLX5E_FTE_ADDR_OF(headers_v, src_ipv4_src_ipv6.ipv4_layout.ipv4), + &ip4src_v, sizeof(ip4src_v)); + memcpy(MLX5E_FTE_ADDR_OF(headers_c, src_ipv4_src_ipv6.ipv4_layout.ipv4), + &ip4src_m, sizeof(ip4src_m)); + } + if (ip4dst_m) { + memcpy(MLX5E_FTE_ADDR_OF(headers_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &ip4dst_v, sizeof(ip4dst_v)); + memcpy(MLX5E_FTE_ADDR_OF(headers_c, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &ip4dst_m, sizeof(ip4dst_m)); + } + + MLX5E_FTE_SET(headers_c, ethertype, 0xffff); + MLX5E_FTE_SET(headers_v, ethertype, ETH_P_IP); +} + +static void +set_ip6(void *headers_c, void *headers_v, __be32 ip6src_m[4], + __be32 ip6src_v[4], __be32 ip6dst_m[4], __be32 ip6dst_v[4]) +{ + u8 ip6_sz = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6); + + if (!ipv6_addr_any((struct in6_addr *)ip6src_m)) { + memcpy(MLX5E_FTE_ADDR_OF(headers_v, src_ipv4_src_ipv6.ipv6_layout.ipv6), + ip6src_v, ip6_sz); + memcpy(MLX5E_FTE_ADDR_OF(headers_c, src_ipv4_src_ipv6.ipv6_layout.ipv6), + ip6src_m, ip6_sz); + } + if (!ipv6_addr_any((struct in6_addr *)ip6dst_m)) { + memcpy(MLX5E_FTE_ADDR_OF(headers_v, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + ip6dst_v, ip6_sz); + memcpy(MLX5E_FTE_ADDR_OF(headers_c, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + ip6dst_m, ip6_sz); + } + + MLX5E_FTE_SET(headers_c, ethertype, 0xffff); + MLX5E_FTE_SET(headers_v, ethertype, ETH_P_IPV6); +} + +static void +set_tcp(void *headers_c, void *headers_v, __be16 psrc_m, __be16 psrc_v, + __be16 pdst_m, __be16 pdst_v) +{ + if (psrc_m) { + MLX5E_FTE_SET(headers_c, tcp_sport, ntohs(psrc_m)); + MLX5E_FTE_SET(headers_v, tcp_sport, ntohs(psrc_v)); + } + if (pdst_m) { + MLX5E_FTE_SET(headers_c, tcp_dport, ntohs(pdst_m)); + MLX5E_FTE_SET(headers_v, tcp_dport, ntohs(pdst_v)); + } + + MLX5E_FTE_SET(headers_c, ip_protocol, 0xffff); + MLX5E_FTE_SET(headers_v, ip_protocol, IPPROTO_TCP); +} + +static void +set_udp(void *headers_c, void *headers_v, __be16 psrc_m, __be16 psrc_v, + __be16 pdst_m, __be16 pdst_v) +{ + if (psrc_m) { + MLX5E_FTE_SET(headers_c, udp_sport, ntohs(psrc_m)); + MLX5E_FTE_SET(headers_v, udp_sport, ntohs(psrc_v)); + } + + if (pdst_m) { + MLX5E_FTE_SET(headers_c, udp_dport, ntohs(pdst_m)); + MLX5E_FTE_SET(headers_v, udp_dport, ntohs(pdst_v)); + } + + MLX5E_FTE_SET(headers_c, ip_protocol, 0xffff); + MLX5E_FTE_SET(headers_v, ip_protocol, IPPROTO_UDP); +} + +static void +parse_tcp4(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_tcpip4_spec *l4_mask = &fs->m_u.tcp_ip4_spec; + struct ethtool_tcpip4_spec *l4_val = &fs->h_u.tcp_ip4_spec; + + set_ip4(headers_c, headers_v, l4_mask->ip4src, l4_val->ip4src, + l4_mask->ip4dst, l4_val->ip4dst); + + set_tcp(headers_c, headers_v, l4_mask->psrc, l4_val->psrc, + l4_mask->pdst, l4_val->pdst); +} + +static void +parse_udp4(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_tcpip4_spec *l4_mask = &fs->m_u.udp_ip4_spec; + struct ethtool_tcpip4_spec *l4_val = &fs->h_u.udp_ip4_spec; + + set_ip4(headers_c, headers_v, l4_mask->ip4src, l4_val->ip4src, + l4_mask->ip4dst, l4_val->ip4dst); + + set_udp(headers_c, headers_v, l4_mask->psrc, l4_val->psrc, + l4_mask->pdst, l4_val->pdst); +} + +static void +parse_ip4(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_usrip4_spec *l3_mask = &fs->m_u.usr_ip4_spec; + struct ethtool_usrip4_spec *l3_val = &fs->h_u.usr_ip4_spec; + + set_ip4(headers_c, headers_v, l3_mask->ip4src, l3_val->ip4src, + l3_mask->ip4dst, l3_val->ip4dst); + + if (l3_mask->proto) { + MLX5E_FTE_SET(headers_c, ip_protocol, l3_mask->proto); + MLX5E_FTE_SET(headers_v, ip_protocol, l3_val->proto); + } +} + +static void +parse_ip6(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_usrip6_spec *l3_mask = &fs->m_u.usr_ip6_spec; + struct ethtool_usrip6_spec *l3_val = &fs->h_u.usr_ip6_spec; + + set_ip6(headers_c, headers_v, l3_mask->ip6src, + l3_val->ip6src, l3_mask->ip6dst, l3_val->ip6dst); + + if (l3_mask->l4_proto) { + MLX5E_FTE_SET(headers_c, ip_protocol, l3_mask->l4_proto); + MLX5E_FTE_SET(headers_v, ip_protocol, l3_val->l4_proto); + } +} + +static void +parse_tcp6(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_tcpip6_spec *l4_mask = &fs->m_u.tcp_ip6_spec; + struct ethtool_tcpip6_spec *l4_val = &fs->h_u.tcp_ip6_spec; + + set_ip6(headers_c, headers_v, l4_mask->ip6src, + l4_val->ip6src, l4_mask->ip6dst, l4_val->ip6dst); + + set_tcp(headers_c, headers_v, l4_mask->psrc, l4_val->psrc, + l4_mask->pdst, l4_val->pdst); +} + +static void +parse_udp6(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_tcpip6_spec *l4_mask = &fs->m_u.udp_ip6_spec; + struct ethtool_tcpip6_spec *l4_val = &fs->h_u.udp_ip6_spec; + + set_ip6(headers_c, headers_v, l4_mask->ip6src, + l4_val->ip6src, l4_mask->ip6dst, l4_val->ip6dst); + + set_udp(headers_c, headers_v, l4_mask->psrc, l4_val->psrc, + l4_mask->pdst, l4_val->pdst); +} + +static void +parse_ether(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs) +{ + struct ethhdr *eth_mask = &fs->m_u.ether_spec; + struct ethhdr *eth_val = &fs->h_u.ether_spec; + + mask_spec((u8 *)eth_mask, (u8 *)eth_val, sizeof(*eth_mask)); + ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_c, smac_47_16), eth_mask->h_source); + ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_v, smac_47_16), eth_val->h_source); + ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_c, dmac_47_16), eth_mask->h_dest); + ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_v, dmac_47_16), eth_val->h_dest); + MLX5E_FTE_SET(headers_c, ethertype, ntohs(eth_mask->h_proto)); + MLX5E_FTE_SET(headers_v, ethertype, ntohs(eth_val->h_proto)); +} + +static void +set_cvlan(void *headers_c, void *headers_v, __be16 vlan_tci) +{ + MLX5E_FTE_SET(headers_c, cvlan_tag, 1); + MLX5E_FTE_SET(headers_v, cvlan_tag, 1); + MLX5E_FTE_SET(headers_c, first_vid, 0xfff); + MLX5E_FTE_SET(headers_v, first_vid, ntohs(vlan_tci)); +} + +static void +set_dmac(void *headers_c, void *headers_v, + unsigned char m_dest[ETH_ALEN], unsigned char v_dest[ETH_ALEN]) +{ + ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_c, dmac_47_16), m_dest); + ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_v, dmac_47_16), v_dest); +} + +static int set_flow_attrs(u32 *match_c, u32 *match_v, + struct ethtool_rx_flow_spec *fs) +{ + void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers); + void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers); + u32 flow_type = flow_type_mask(fs->flow_type); + + switch (flow_type) { + case TCP_V4_FLOW: + parse_tcp4(outer_headers_c, outer_headers_v, fs); + break; + case UDP_V4_FLOW: + parse_udp4(outer_headers_c, outer_headers_v, fs); + break; + case IP_USER_FLOW: + parse_ip4(outer_headers_c, outer_headers_v, fs); + break; + case TCP_V6_FLOW: + parse_tcp6(outer_headers_c, outer_headers_v, fs); + break; + case UDP_V6_FLOW: + parse_udp6(outer_headers_c, outer_headers_v, fs); + break; + case IPV6_USER_FLOW: + parse_ip6(outer_headers_c, outer_headers_v, fs); + break; + case ETHER_FLOW: + parse_ether(outer_headers_c, outer_headers_v, fs); + break; + default: + return -EINVAL; + } + + if ((fs->flow_type & FLOW_EXT) && + (fs->m_ext.vlan_tci & cpu_to_be16(VLAN_VID_MASK))) + set_cvlan(outer_headers_c, outer_headers_v, fs->h_ext.vlan_tci); + + if (fs->flow_type & FLOW_MAC_EXT && + !is_zero_ether_addr(fs->m_ext.h_dest)) { + mask_spec(fs->m_ext.h_dest, fs->h_ext.h_dest, ETH_ALEN); + set_dmac(outer_headers_c, outer_headers_v, fs->m_ext.h_dest, + fs->h_ext.h_dest); + } + + return 0; +} + +static void add_rule_to_list(struct mlx5e_priv *priv, + struct mlx5e_ethtool_rule *rule) +{ + struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs); + struct list_head *head = ðtool->rules; + struct mlx5e_ethtool_rule *iter; + + list_for_each_entry(iter, ðtool->rules, list) { + if (iter->flow_spec.location > rule->flow_spec.location) + break; + head = &iter->list; + } + ethtool->tot_num_rules++; + list_add(&rule->list, head); +} + +static bool outer_header_zero(u32 *match_criteria) +{ + int size = MLX5_FLD_SZ_BYTES(fte_match_param, outer_headers); + char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers); + + return outer_headers_c[0] == 0 && !memcmp(outer_headers_c, + outer_headers_c + 1, + size - 1); +} + +static int flow_get_tirn(struct mlx5e_priv *priv, + struct mlx5e_ethtool_rule *eth_rule, + struct ethtool_rx_flow_spec *fs, + u32 rss_context, u32 *tirn) +{ + if (fs->flow_type & FLOW_RSS) { + struct mlx5e_packet_merge_param pkt_merge_param; + struct mlx5e_rss *rss; + u32 flow_type; + int err; + int tt; + + rss = mlx5e_rx_res_rss_get(priv->rx_res, rss_context); + if (!rss) + return -ENOENT; + + flow_type = flow_type_mask(fs->flow_type); + tt = flow_type_to_traffic_type(flow_type); + if (tt < 0) + return -EINVAL; + + pkt_merge_param = priv->channels.params.packet_merge; + err = mlx5e_rss_obtain_tirn(rss, tt, &pkt_merge_param, false, tirn); + if (err) + return err; + eth_rule->rss = rss; + mlx5e_rss_refcnt_inc(eth_rule->rss); + } else { + *tirn = mlx5e_rx_res_get_tirn_direct(priv->rx_res, fs->ring_cookie); + } + + return 0; +} + +static struct mlx5_flow_handle * +add_ethtool_flow_rule(struct mlx5e_priv *priv, + struct mlx5e_ethtool_rule *eth_rule, + struct mlx5_flow_table *ft, + struct ethtool_rx_flow_spec *fs, u32 rss_context) +{ + struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND }; + struct mlx5_flow_destination *dst = NULL; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + err = set_flow_attrs(spec->match_criteria, spec->match_value, + fs); + if (err) + goto free; + + if (fs->ring_cookie == RX_CLS_FLOW_DISC) { + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + } else { + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (!dst) { + err = -ENOMEM; + goto free; + } + + err = flow_get_tirn(priv, eth_rule, fs, rss_context, &dst->tir_num); + if (err) + goto free; + + dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } + + spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria)); + spec->flow_context.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, dst ? 1 : 0); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "%s: failed to add ethtool steering rule: %d\n", + __func__, err); + goto free; + } +free: + kvfree(spec); + kfree(dst); + return err ? ERR_PTR(err) : rule; +} + +static void del_ethtool_rule(struct mlx5e_flow_steering *fs, + struct mlx5e_ethtool_rule *eth_rule) +{ + struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(fs); + if (eth_rule->rule) + mlx5_del_flow_rules(eth_rule->rule); + if (eth_rule->rss) + mlx5e_rss_refcnt_dec(eth_rule->rss); + list_del(ð_rule->list); + ethtool->tot_num_rules--; + put_flow_table(eth_rule->eth_ft); + kfree(eth_rule); +} + +static struct mlx5e_ethtool_rule *find_ethtool_rule(struct mlx5e_priv *priv, + int location) +{ + struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs); + struct mlx5e_ethtool_rule *iter; + + list_for_each_entry(iter, ðtool->rules, list) { + if (iter->flow_spec.location == location) + return iter; + } + return NULL; +} + +static struct mlx5e_ethtool_rule *get_ethtool_rule(struct mlx5e_priv *priv, + int location) +{ + struct mlx5e_ethtool_rule *eth_rule; + + eth_rule = find_ethtool_rule(priv, location); + if (eth_rule) + del_ethtool_rule(priv->fs, eth_rule); + + eth_rule = kzalloc(sizeof(*eth_rule), GFP_KERNEL); + if (!eth_rule) + return ERR_PTR(-ENOMEM); + + add_rule_to_list(priv, eth_rule); + return eth_rule; +} + +#define MAX_NUM_OF_ETHTOOL_RULES BIT(10) + +#define all_ones(field) (field == (__force typeof(field))-1) +#define all_zeros_or_all_ones(field) \ + ((field) == 0 || (field) == (__force typeof(field))-1) + +static int validate_ethter(struct ethtool_rx_flow_spec *fs) +{ + struct ethhdr *eth_mask = &fs->m_u.ether_spec; + int ntuples = 0; + + if (!is_zero_ether_addr(eth_mask->h_dest)) + ntuples++; + if (!is_zero_ether_addr(eth_mask->h_source)) + ntuples++; + if (eth_mask->h_proto) + ntuples++; + return ntuples; +} + +static int validate_tcpudp4(struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_tcpip4_spec *l4_mask = &fs->m_u.tcp_ip4_spec; + int ntuples = 0; + + if (l4_mask->tos) + return -EINVAL; + + if (l4_mask->ip4src) + ntuples++; + if (l4_mask->ip4dst) + ntuples++; + if (l4_mask->psrc) + ntuples++; + if (l4_mask->pdst) + ntuples++; + /* Flow is TCP/UDP */ + return ++ntuples; +} + +static int validate_ip4(struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_usrip4_spec *l3_mask = &fs->m_u.usr_ip4_spec; + int ntuples = 0; + + if (l3_mask->l4_4_bytes || l3_mask->tos || + fs->h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4) + return -EINVAL; + if (l3_mask->ip4src) + ntuples++; + if (l3_mask->ip4dst) + ntuples++; + if (l3_mask->proto) + ntuples++; + /* Flow is IPv4 */ + return ++ntuples; +} + +static int validate_ip6(struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_usrip6_spec *l3_mask = &fs->m_u.usr_ip6_spec; + int ntuples = 0; + + if (l3_mask->l4_4_bytes || l3_mask->tclass) + return -EINVAL; + if (!ipv6_addr_any((struct in6_addr *)l3_mask->ip6src)) + ntuples++; + + if (!ipv6_addr_any((struct in6_addr *)l3_mask->ip6dst)) + ntuples++; + if (l3_mask->l4_proto) + ntuples++; + /* Flow is IPv6 */ + return ++ntuples; +} + +static int validate_tcpudp6(struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_tcpip6_spec *l4_mask = &fs->m_u.tcp_ip6_spec; + int ntuples = 0; + + if (l4_mask->tclass) + return -EINVAL; + + if (!ipv6_addr_any((struct in6_addr *)l4_mask->ip6src)) + ntuples++; + + if (!ipv6_addr_any((struct in6_addr *)l4_mask->ip6dst)) + ntuples++; + + if (l4_mask->psrc) + ntuples++; + if (l4_mask->pdst) + ntuples++; + /* Flow is TCP/UDP */ + return ++ntuples; +} + +static int validate_vlan(struct ethtool_rx_flow_spec *fs) +{ + if (fs->m_ext.vlan_etype || + fs->m_ext.vlan_tci != cpu_to_be16(VLAN_VID_MASK)) + return -EINVAL; + + if (fs->m_ext.vlan_tci && + (be16_to_cpu(fs->h_ext.vlan_tci) >= VLAN_N_VID)) + return -EINVAL; + + return 1; +} + +static int validate_flow(struct mlx5e_priv *priv, + struct ethtool_rx_flow_spec *fs) +{ + int num_tuples = 0; + int ret = 0; + + if (fs->location >= MAX_NUM_OF_ETHTOOL_RULES) + return -ENOSPC; + + if (fs->ring_cookie != RX_CLS_FLOW_DISC) + if (fs->ring_cookie >= priv->channels.params.num_channels) + return -EINVAL; + + switch (flow_type_mask(fs->flow_type)) { + case ETHER_FLOW: + num_tuples += validate_ethter(fs); + break; + case TCP_V4_FLOW: + case UDP_V4_FLOW: + ret = validate_tcpudp4(fs); + if (ret < 0) + return ret; + num_tuples += ret; + break; + case IP_USER_FLOW: + ret = validate_ip4(fs); + if (ret < 0) + return ret; + num_tuples += ret; + break; + case TCP_V6_FLOW: + case UDP_V6_FLOW: + ret = validate_tcpudp6(fs); + if (ret < 0) + return ret; + num_tuples += ret; + break; + case IPV6_USER_FLOW: + ret = validate_ip6(fs); + if (ret < 0) + return ret; + num_tuples += ret; + break; + default: + return -ENOTSUPP; + } + if ((fs->flow_type & FLOW_EXT)) { + ret = validate_vlan(fs); + if (ret < 0) + return ret; + num_tuples += ret; + } + + if (fs->flow_type & FLOW_MAC_EXT && + !is_zero_ether_addr(fs->m_ext.h_dest)) + num_tuples++; + + return num_tuples; +} + +static int +mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv, + struct ethtool_rx_flow_spec *fs, u32 rss_context) +{ + struct mlx5e_ethtool_table *eth_ft; + struct mlx5e_ethtool_rule *eth_rule; + struct mlx5_flow_handle *rule; + int num_tuples; + int err; + + num_tuples = validate_flow(priv, fs); + if (num_tuples <= 0) { + netdev_warn(priv->netdev, "%s: flow is not valid %d\n", + __func__, num_tuples); + return num_tuples; + } + + eth_ft = get_flow_table(priv, fs, num_tuples); + if (IS_ERR(eth_ft)) + return PTR_ERR(eth_ft); + + eth_rule = get_ethtool_rule(priv, fs->location); + if (IS_ERR(eth_rule)) { + put_flow_table(eth_ft); + return PTR_ERR(eth_rule); + } + + eth_rule->flow_spec = *fs; + eth_rule->eth_ft = eth_ft; + + rule = add_ethtool_flow_rule(priv, eth_rule, eth_ft->ft, fs, rss_context); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto del_ethtool_rule; + } + + eth_rule->rule = rule; + + return 0; + +del_ethtool_rule: + del_ethtool_rule(priv->fs, eth_rule); + + return err; +} + +static int +mlx5e_ethtool_flow_remove(struct mlx5e_priv *priv, int location) +{ + struct mlx5e_ethtool_rule *eth_rule; + int err = 0; + + if (location >= MAX_NUM_OF_ETHTOOL_RULES) + return -ENOSPC; + + eth_rule = find_ethtool_rule(priv, location); + if (!eth_rule) { + err = -ENOENT; + goto out; + } + + del_ethtool_rule(priv->fs, eth_rule); +out: + return err; +} + +static int +mlx5e_ethtool_get_flow(struct mlx5e_priv *priv, + struct ethtool_rxnfc *info, int location) +{ + struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs); + struct mlx5e_ethtool_rule *eth_rule; + + if (location < 0 || location >= MAX_NUM_OF_ETHTOOL_RULES) + return -EINVAL; + + list_for_each_entry(eth_rule, ðtool->rules, list) { + int index; + + if (eth_rule->flow_spec.location != location) + continue; + if (!info) + return 0; + info->fs = eth_rule->flow_spec; + if (!eth_rule->rss) + return 0; + index = mlx5e_rx_res_rss_index(priv->rx_res, eth_rule->rss); + if (index < 0) + return index; + info->rss_context = index; + return 0; + } + + return -ENOENT; +} + +static int +mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv, + struct ethtool_rxnfc *info, u32 *rule_locs) +{ + int location = 0; + int idx = 0; + int err = 0; + + info->data = MAX_NUM_OF_ETHTOOL_RULES; + while ((!err || err == -ENOENT) && idx < info->rule_cnt) { + err = mlx5e_ethtool_get_flow(priv, NULL, location); + if (!err) + rule_locs[idx++] = location; + location++; + } + return err; +} + +int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool) +{ + *ethtool = kvzalloc(sizeof(**ethtool), GFP_KERNEL); + if (!*ethtool) + return -ENOMEM; + return 0; +} + +void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool) +{ + kvfree(ethtool); +} + +void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(fs); + struct mlx5e_ethtool_rule *iter; + struct mlx5e_ethtool_rule *temp; + + list_for_each_entry_safe(iter, temp, ðtool->rules, list) + del_ethtool_rule(fs, iter); +} + +void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs) +{ + struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(fs); + + INIT_LIST_HEAD(ðtool->rules); +} + +static int flow_type_to_traffic_type(u32 flow_type) +{ + switch (flow_type) { + case TCP_V4_FLOW: + return MLX5_TT_IPV4_TCP; + case TCP_V6_FLOW: + return MLX5_TT_IPV6_TCP; + case UDP_V4_FLOW: + return MLX5_TT_IPV4_UDP; + case UDP_V6_FLOW: + return MLX5_TT_IPV6_UDP; + case AH_V4_FLOW: + return MLX5_TT_IPV4_IPSEC_AH; + case AH_V6_FLOW: + return MLX5_TT_IPV6_IPSEC_AH; + case ESP_V4_FLOW: + return MLX5_TT_IPV4_IPSEC_ESP; + case ESP_V6_FLOW: + return MLX5_TT_IPV6_IPSEC_ESP; + case IPV4_FLOW: + return MLX5_TT_IPV4; + case IPV6_FLOW: + return MLX5_TT_IPV6; + default: + return -EINVAL; + } +} + +static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv, + struct ethtool_rxnfc *nfc) +{ + u8 rx_hash_field = 0; + u32 flow_type = 0; + u32 rss_idx = 0; + int err; + int tt; + + if (nfc->flow_type & FLOW_RSS) + rss_idx = nfc->rss_context; + + flow_type = flow_type_mask(nfc->flow_type); + tt = flow_type_to_traffic_type(flow_type); + if (tt < 0) + return tt; + + /* RSS does not support anything other than hashing to queues + * on src IP, dest IP, TCP/UDP src port and TCP/UDP dest + * port. + */ + if (flow_type != TCP_V4_FLOW && + flow_type != TCP_V6_FLOW && + flow_type != UDP_V4_FLOW && + flow_type != UDP_V6_FLOW) + return -EOPNOTSUPP; + + if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3)) + return -EOPNOTSUPP; + + if (nfc->data & RXH_IP_SRC) + rx_hash_field |= MLX5_HASH_FIELD_SEL_SRC_IP; + if (nfc->data & RXH_IP_DST) + rx_hash_field |= MLX5_HASH_FIELD_SEL_DST_IP; + if (nfc->data & RXH_L4_B_0_1) + rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_SPORT; + if (nfc->data & RXH_L4_B_2_3) + rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_DPORT; + + mutex_lock(&priv->state_lock); + err = mlx5e_rx_res_rss_set_hash_fields(priv->rx_res, rss_idx, tt, rx_hash_field); + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv, + struct ethtool_rxnfc *nfc) +{ + int hash_field = 0; + u32 flow_type = 0; + u32 rss_idx = 0; + int tt; + + if (nfc->flow_type & FLOW_RSS) + rss_idx = nfc->rss_context; + + flow_type = flow_type_mask(nfc->flow_type); + tt = flow_type_to_traffic_type(flow_type); + if (tt < 0) + return tt; + + hash_field = mlx5e_rx_res_rss_get_hash_fields(priv->rx_res, rss_idx, tt); + if (hash_field < 0) + return hash_field; + + nfc->data = 0; + + if (hash_field & MLX5_HASH_FIELD_SEL_SRC_IP) + nfc->data |= RXH_IP_SRC; + if (hash_field & MLX5_HASH_FIELD_SEL_DST_IP) + nfc->data |= RXH_IP_DST; + if (hash_field & MLX5_HASH_FIELD_SEL_L4_SPORT) + nfc->data |= RXH_L4_B_0_1; + if (hash_field & MLX5_HASH_FIELD_SEL_L4_DPORT) + nfc->data |= RXH_L4_B_2_3; + + return 0; +} + +int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd) +{ + int err = 0; + + switch (cmd->cmd) { + case ETHTOOL_SRXCLSRLINS: + err = mlx5e_ethtool_flow_replace(priv, &cmd->fs, cmd->rss_context); + break; + case ETHTOOL_SRXCLSRLDEL: + err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location); + break; + case ETHTOOL_SRXFH: + err = mlx5e_set_rss_hash_opt(priv, cmd); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv, + struct ethtool_rxnfc *info, u32 *rule_locs) +{ + struct mlx5e_ethtool_steering *ethtool = mlx5e_fs_get_ethtool(priv->fs); + int err = 0; + + switch (info->cmd) { + case ETHTOOL_GRXCLSRLCNT: + info->rule_cnt = ethtool->tot_num_rules; + break; + case ETHTOOL_GRXCLSRULE: + err = mlx5e_ethtool_get_flow(priv, info, info->fs.location); + break; + case ETHTOOL_GRXCLSRLALL: + err = mlx5e_ethtool_get_all_flows(priv, info, rule_locs); + break; + case ETHTOOL_GRXFH: + err = mlx5e_get_rss_hash_opt(priv, info); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c new file mode 100644 index 0000000000..c3961c2bbc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -0,0 +1,6151 @@ +/* + * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "eswitch.h" +#include "en.h" +#include "en/txrx.h" +#include "en_tc.h" +#include "en_rep.h" +#include "en_accel/ipsec.h" +#include "en_accel/macsec.h" +#include "en_accel/en_accel.h" +#include "en_accel/ktls.h" +#include "lib/vxlan.h" +#include "lib/clock.h" +#include "en/port.h" +#include "en/xdp.h" +#include "lib/eq.h" +#include "en/monitor_stats.h" +#include "en/health.h" +#include "en/params.h" +#include "en/xsk/pool.h" +#include "en/xsk/setup.h" +#include "en/xsk/rx.h" +#include "en/xsk/tx.h" +#include "en/hv_vhca_stats.h" +#include "en/devlink.h" +#include "lib/mlx5.h" +#include "en/ptp.h" +#include "en/htb.h" +#include "qos.h" +#include "en/trap.h" + +bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift, + enum mlx5e_mpwrq_umr_mode umr_mode) +{ + u16 umr_wqebbs, max_wqebbs; + bool striding_rq_umr; + + striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) && MLX5_CAP_GEN(mdev, umr_ptr_rlky) && + MLX5_CAP_ETH(mdev, reg_umr_sq); + if (!striding_rq_umr) + return false; + + umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift, umr_mode); + max_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); + /* Sanity check; should never happen, because mlx5e_mpwrq_umr_wqebbs is + * calculated from mlx5e_get_max_sq_aligned_wqebbs. + */ + if (WARN_ON(umr_wqebbs > max_wqebbs)) + return false; + + return true; +} + +void mlx5e_update_carrier(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u8 port_state; + bool up; + + port_state = mlx5_query_vport_state(mdev, + MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, + 0); + + up = port_state == VPORT_STATE_UP; + if (up == netif_carrier_ok(priv->netdev)) + netif_carrier_event(priv->netdev); + if (up) { + netdev_info(priv->netdev, "Link up\n"); + netif_carrier_on(priv->netdev); + } else { + netdev_info(priv->netdev, "Link down\n"); + netif_carrier_off(priv->netdev); + } +} + +static void mlx5e_update_carrier_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + update_carrier_work); + + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + if (priv->profile->update_carrier) + priv->profile->update_carrier(priv); + mutex_unlock(&priv->state_lock); +} + +static void mlx5e_update_stats_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + update_stats_work); + + mutex_lock(&priv->state_lock); + priv->profile->update_stats(priv); + mutex_unlock(&priv->state_lock); +} + +void mlx5e_queue_update_stats(struct mlx5e_priv *priv) +{ + if (!priv->profile->update_stats) + return; + + if (unlikely(test_bit(MLX5E_STATE_DESTROYING, &priv->state))) + return; + + queue_work(priv->wq, &priv->update_stats_work); +} + +static int async_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb); + struct mlx5_eqe *eqe = data; + + if (event != MLX5_EVENT_TYPE_PORT_CHANGE) + return NOTIFY_DONE; + + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + queue_work(priv->wq, &priv->update_carrier_work); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +static void mlx5e_enable_async_events(struct mlx5e_priv *priv) +{ + priv->events_nb.notifier_call = async_event; + mlx5_notifier_register(priv->mdev, &priv->events_nb); +} + +static void mlx5e_disable_async_events(struct mlx5e_priv *priv) +{ + mlx5_notifier_unregister(priv->mdev, &priv->events_nb); +} + +static int blocking_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, blocking_events_nb); + struct mlx5_devlink_trap_event_ctx *trap_event_ctx = data; + int err; + + switch (event) { + case MLX5_DRIVER_EVENT_TYPE_TRAP: + err = mlx5e_handle_trap_event(priv, trap_event_ctx->trap); + if (err) { + trap_event_ctx->err = err; + return NOTIFY_BAD; + } + break; + default: + return NOTIFY_DONE; + } + return NOTIFY_OK; +} + +static void mlx5e_enable_blocking_events(struct mlx5e_priv *priv) +{ + priv->blocking_events_nb.notifier_call = blocking_event; + mlx5_blocking_notifier_register(priv->mdev, &priv->blocking_events_nb); +} + +static void mlx5e_disable_blocking_events(struct mlx5e_priv *priv) +{ + mlx5_blocking_notifier_unregister(priv->mdev, &priv->blocking_events_nb); +} + +static u16 mlx5e_mpwrq_umr_octowords(u32 entries, enum mlx5e_mpwrq_umr_mode umr_mode) +{ + u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode); + u32 sz; + + sz = ALIGN(entries * umr_entry_size, MLX5_UMR_FLEX_ALIGNMENT); + + return sz / MLX5_OCTWORD; +} + +static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, + struct mlx5e_icosq *sq, + struct mlx5e_umr_wqe *wqe) +{ + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; + u16 octowords; + u8 ds_cnt; + + ds_cnt = DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(rq->mdev, rq->mpwqe.page_shift, + rq->mpwqe.umr_mode), + MLX5_SEND_WQE_DS); + + cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | + ds_cnt); + cseg->umr_mkey = rq->mpwqe.umr_mkey_be; + + ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE; + octowords = mlx5e_mpwrq_umr_octowords(rq->mpwqe.pages_per_wqe, rq->mpwqe.umr_mode); + ucseg->xlt_octowords = cpu_to_be16(octowords); + ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); +} + +static int mlx5e_rq_shampo_hd_alloc(struct mlx5e_rq *rq, int node) +{ + rq->mpwqe.shampo = kvzalloc_node(sizeof(*rq->mpwqe.shampo), + GFP_KERNEL, node); + if (!rq->mpwqe.shampo) + return -ENOMEM; + return 0; +} + +static void mlx5e_rq_shampo_hd_free(struct mlx5e_rq *rq) +{ + kvfree(rq->mpwqe.shampo); +} + +static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, int node) +{ + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + + shampo->bitmap = bitmap_zalloc_node(shampo->hd_per_wq, GFP_KERNEL, + node); + shampo->info = kvzalloc_node(array_size(shampo->hd_per_wq, + sizeof(*shampo->info)), + GFP_KERNEL, node); + shampo->pages = kvzalloc_node(array_size(shampo->hd_per_wq, + sizeof(*shampo->pages)), + GFP_KERNEL, node); + if (!shampo->bitmap || !shampo->info || !shampo->pages) + goto err_nomem; + + return 0; + +err_nomem: + kvfree(shampo->info); + kvfree(shampo->bitmap); + kvfree(shampo->pages); + + return -ENOMEM; +} + +static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq) +{ + kvfree(rq->mpwqe.shampo->bitmap); + kvfree(rq->mpwqe.shampo->info); + kvfree(rq->mpwqe.shampo->pages); +} + +static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) +{ + int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); + size_t alloc_size; + + alloc_size = array_size(wq_sz, struct_size(rq->mpwqe.info, + alloc_units.frag_pages, + rq->mpwqe.pages_per_wqe)); + + rq->mpwqe.info = kvzalloc_node(alloc_size, GFP_KERNEL, node); + if (!rq->mpwqe.info) + return -ENOMEM; + + /* For deferred page release (release right before alloc), make sure + * that on first round release is not called. + */ + for (int i = 0; i < wq_sz; i++) { + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, i); + + bitmap_fill(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); + } + + mlx5e_build_umr_wqe(rq, rq->icosq, &rq->mpwqe.umr_wqe); + + return 0; +} + + +static u8 mlx5e_mpwrq_access_mode(enum mlx5e_mpwrq_umr_mode umr_mode) +{ + switch (umr_mode) { + case MLX5E_MPWRQ_UMR_MODE_ALIGNED: + return MLX5_MKC_ACCESS_MODE_MTT; + case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: + return MLX5_MKC_ACCESS_MODE_KSM; + case MLX5E_MPWRQ_UMR_MODE_OVERSIZED: + return MLX5_MKC_ACCESS_MODE_KLMS; + case MLX5E_MPWRQ_UMR_MODE_TRIPLE: + return MLX5_MKC_ACCESS_MODE_KSM; + } + WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode); + return 0; +} + +static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, + u32 npages, u8 page_shift, u32 *umr_mkey, + dma_addr_t filler_addr, + enum mlx5e_mpwrq_umr_mode umr_mode, + u32 xsk_chunk_size) +{ + struct mlx5_mtt *mtt; + struct mlx5_ksm *ksm; + struct mlx5_klm *klm; + u32 octwords; + int inlen; + void *mkc; + u32 *in; + int err; + int i; + + if ((umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED || + umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE) && + !MLX5_CAP_GEN(mdev, fixed_buffer_size)) { + mlx5_core_warn(mdev, "Unaligned AF_XDP requires fixed_buffer_size capability\n"); + return -EINVAL; + } + + octwords = mlx5e_mpwrq_umr_octowords(npages, umr_mode); + + inlen = MLX5_FLEXIBLE_INLEN(mdev, MLX5_ST_SZ_BYTES(create_mkey_in), + MLX5_OCTWORD, octwords); + if (inlen < 0) + return inlen; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, umr_en, 1); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + MLX5_SET(mkc, mkc, access_mode_1_0, mlx5e_mpwrq_access_mode(umr_mode)); + mlx5e_mkey_set_relaxed_ordering(mdev, mkc); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn); + MLX5_SET64(mkc, mkc, len, npages << page_shift); + MLX5_SET(mkc, mkc, translations_octword_size, octwords); + if (umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE) + MLX5_SET(mkc, mkc, log_page_size, page_shift - 2); + else if (umr_mode != MLX5E_MPWRQ_UMR_MODE_OVERSIZED) + MLX5_SET(mkc, mkc, log_page_size, page_shift); + MLX5_SET(create_mkey_in, in, translations_octword_actual_size, octwords); + + /* Initialize the mkey with all MTTs pointing to a default + * page (filler_addr). When the channels are activated, UMR + * WQEs will redirect the RX WQEs to the actual memory from + * the RQ's pool, while the gaps (wqe_overflow) remain mapped + * to the default page. + */ + switch (umr_mode) { + case MLX5E_MPWRQ_UMR_MODE_OVERSIZED: + klm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + for (i = 0; i < npages; i++) { + klm[i << 1] = (struct mlx5_klm) { + .va = cpu_to_be64(filler_addr), + .bcount = cpu_to_be32(xsk_chunk_size), + .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey), + }; + klm[(i << 1) + 1] = (struct mlx5_klm) { + .va = cpu_to_be64(filler_addr), + .bcount = cpu_to_be32((1 << page_shift) - xsk_chunk_size), + .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey), + }; + } + break; + case MLX5E_MPWRQ_UMR_MODE_UNALIGNED: + ksm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + for (i = 0; i < npages; i++) + ksm[i] = (struct mlx5_ksm) { + .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey), + .va = cpu_to_be64(filler_addr), + }; + break; + case MLX5E_MPWRQ_UMR_MODE_ALIGNED: + mtt = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + for (i = 0; i < npages; i++) + mtt[i] = (struct mlx5_mtt) { + .ptag = cpu_to_be64(filler_addr), + }; + break; + case MLX5E_MPWRQ_UMR_MODE_TRIPLE: + ksm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + for (i = 0; i < npages * 4; i++) { + ksm[i] = (struct mlx5_ksm) { + .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey), + .va = cpu_to_be64(filler_addr), + }; + } + break; + } + + err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen); + + kvfree(in); + return err; +} + +static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev, + u64 nentries, + u32 *umr_mkey) +{ + int inlen; + void *mkc; + u32 *in; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, umr_en, 1); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS); + mlx5e_mkey_set_relaxed_ordering(mdev, mkc); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn); + MLX5_SET(mkc, mkc, translations_octword_size, nentries); + MLX5_SET(mkc, mkc, length64, 1); + err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen); + + kvfree(in); + return err; +} + +static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq) +{ + u32 xsk_chunk_size = rq->xsk_pool ? rq->xsk_pool->chunk_size : 0; + u32 wq_size = mlx5_wq_ll_get_size(&rq->mpwqe.wq); + u32 num_entries, max_num_entries; + u32 umr_mkey; + int err; + + max_num_entries = mlx5e_mpwrq_max_num_entries(mdev, rq->mpwqe.umr_mode); + + /* Shouldn't overflow, the result is at most MLX5E_MAX_RQ_NUM_MTTS. */ + if (WARN_ON_ONCE(check_mul_overflow(wq_size, (u32)rq->mpwqe.mtts_per_wqe, + &num_entries) || + num_entries > max_num_entries)) + mlx5_core_err(mdev, "%s: multiplication overflow: %u * %u > %u\n", + __func__, wq_size, rq->mpwqe.mtts_per_wqe, + max_num_entries); + + err = mlx5e_create_umr_mkey(mdev, num_entries, rq->mpwqe.page_shift, + &umr_mkey, rq->wqe_overflow.addr, + rq->mpwqe.umr_mode, xsk_chunk_size); + rq->mpwqe.umr_mkey_be = cpu_to_be32(umr_mkey); + return err; +} + +static int mlx5e_create_rq_hd_umr_mkey(struct mlx5_core_dev *mdev, + struct mlx5e_rq *rq) +{ + u32 max_klm_size = BIT(MLX5_CAP_GEN(mdev, log_max_klm_list_size)); + + if (max_klm_size < rq->mpwqe.shampo->hd_per_wq) { + mlx5_core_err(mdev, "max klm list size 0x%x is smaller than shampo header buffer list size 0x%x\n", + max_klm_size, rq->mpwqe.shampo->hd_per_wq); + return -EINVAL; + } + return mlx5e_create_umr_klm_mkey(mdev, rq->mpwqe.shampo->hd_per_wq, + &rq->mpwqe.shampo->mkey); +} + +static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) +{ + struct mlx5e_wqe_frag_info next_frag = {}; + struct mlx5e_wqe_frag_info *prev = NULL; + int i; + + WARN_ON(rq->xsk_pool); + + next_frag.frag_page = &rq->wqe.alloc_units->frag_pages[0]; + + /* Skip first release due to deferred release. */ + next_frag.flags = BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); + + for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) { + struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; + struct mlx5e_wqe_frag_info *frag = + &rq->wqe.frags[i << rq->wqe.info.log_num_frags]; + int f; + + for (f = 0; f < rq->wqe.info.num_frags; f++, frag++) { + if (next_frag.offset + frag_info[f].frag_stride > PAGE_SIZE) { + /* Pages are assigned at runtime. */ + next_frag.frag_page++; + next_frag.offset = 0; + if (prev) + prev->flags |= BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE); + } + *frag = next_frag; + + /* prepare next */ + next_frag.offset += frag_info[f].frag_stride; + prev = frag; + } + } + + if (prev) + prev->flags |= BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE); +} + +static void mlx5e_init_xsk_buffs(struct mlx5e_rq *rq) +{ + int i; + + /* Assumptions used by XSK batched allocator. */ + WARN_ON(rq->wqe.info.num_frags != 1); + WARN_ON(rq->wqe.info.log_num_frags != 0); + WARN_ON(rq->wqe.info.arr[0].frag_stride != PAGE_SIZE); + + /* Considering the above assumptions a fragment maps to a single + * xsk_buff. + */ + for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) { + rq->wqe.frags[i].xskp = &rq->wqe.alloc_units->xsk_buffs[i]; + + /* Skip first release due to deferred release as WQES are + * not allocated yet. + */ + rq->wqe.frags[i].flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); + } +} + +static int mlx5e_init_wqe_alloc_info(struct mlx5e_rq *rq, int node) +{ + int wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq); + int len = wq_sz << rq->wqe.info.log_num_frags; + struct mlx5e_wqe_frag_info *frags; + union mlx5e_alloc_units *aus; + int aus_sz; + + if (rq->xsk_pool) + aus_sz = sizeof(*aus->xsk_buffs); + else + aus_sz = sizeof(*aus->frag_pages); + + aus = kvzalloc_node(array_size(len, aus_sz), GFP_KERNEL, node); + if (!aus) + return -ENOMEM; + + frags = kvzalloc_node(array_size(len, sizeof(*frags)), GFP_KERNEL, node); + if (!frags) { + kvfree(aus); + return -ENOMEM; + } + + rq->wqe.alloc_units = aus; + rq->wqe.frags = frags; + + if (rq->xsk_pool) + mlx5e_init_xsk_buffs(rq); + else + mlx5e_init_frags_partition(rq); + + return 0; +} + +static void mlx5e_free_wqe_alloc_info(struct mlx5e_rq *rq) +{ + kvfree(rq->wqe.frags); + kvfree(rq->wqe.alloc_units); +} + +static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work) +{ + struct mlx5e_rq *rq = container_of(recover_work, struct mlx5e_rq, recover_work); + + mlx5e_reporter_rq_cqe_err(rq); +} + +static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq) +{ + rq->wqe_overflow.page = alloc_page(GFP_KERNEL); + if (!rq->wqe_overflow.page) + return -ENOMEM; + + rq->wqe_overflow.addr = dma_map_page(rq->pdev, rq->wqe_overflow.page, 0, + PAGE_SIZE, rq->buff.map_dir); + if (dma_mapping_error(rq->pdev, rq->wqe_overflow.addr)) { + __free_page(rq->wqe_overflow.page); + return -ENOMEM; + } + return 0; +} + +static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq) +{ + dma_unmap_page(rq->pdev, rq->wqe_overflow.addr, PAGE_SIZE, + rq->buff.map_dir); + __free_page(rq->wqe_overflow.page); +} + +static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + u32 xdp_frag_size, struct mlx5e_rq *rq) +{ + struct mlx5_core_dev *mdev = c->mdev; + int err; + + rq->wq_type = params->rq_wq_type; + rq->pdev = c->pdev; + rq->netdev = c->netdev; + rq->priv = c->priv; + rq->tstamp = c->tstamp; + rq->clock = &mdev->clock; + rq->icosq = &c->icosq; + rq->ix = c->ix; + rq->channel = c; + rq->mdev = mdev; + rq->hw_mtu = + MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN * !params->scatter_fcs_en; + rq->xdpsq = &c->rq_xdpsq; + rq->stats = &c->priv->channel_stats[c->ix]->rq; + rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); + err = mlx5e_rq_set_handlers(rq, params, NULL); + if (err) + return err; + + return __xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, c->napi.napi_id, + xdp_frag_size); +} + +static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_rq_param *rqp, + struct mlx5e_rq *rq, + u32 *pool_size, + int node) +{ + void *wqc = MLX5_ADDR_OF(rqc, rqp->rqc, wq); + int wq_size; + int err; + + if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) + return 0; + err = mlx5e_rq_shampo_hd_alloc(rq, node); + if (err) + goto out; + rq->mpwqe.shampo->hd_per_wq = + mlx5e_shampo_hd_per_wq(mdev, params, rqp); + err = mlx5e_create_rq_hd_umr_mkey(mdev, rq); + if (err) + goto err_shampo_hd; + err = mlx5e_rq_shampo_hd_info_alloc(rq, node); + if (err) + goto err_shampo_info; + rq->hw_gro_data = kvzalloc_node(sizeof(*rq->hw_gro_data), GFP_KERNEL, node); + if (!rq->hw_gro_data) { + err = -ENOMEM; + goto err_hw_gro_data; + } + rq->mpwqe.shampo->key = + cpu_to_be32(rq->mpwqe.shampo->mkey); + rq->mpwqe.shampo->hd_per_wqe = + mlx5e_shampo_hd_per_wqe(mdev, params, rqp); + wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz)); + *pool_size += (rq->mpwqe.shampo->hd_per_wqe * wq_size) / + MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; + return 0; + +err_hw_gro_data: + mlx5e_rq_shampo_hd_info_free(rq); +err_shampo_info: + mlx5_core_destroy_mkey(mdev, rq->mpwqe.shampo->mkey); +err_shampo_hd: + mlx5e_rq_shampo_hd_free(rq); +out: + return err; +} + +static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq) +{ + if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) + return; + + kvfree(rq->hw_gro_data); + mlx5e_rq_shampo_hd_info_free(rq); + mlx5_core_destroy_mkey(rq->mdev, rq->mpwqe.shampo->mkey); + mlx5e_rq_shampo_hd_free(rq); +} + +static int mlx5e_alloc_rq(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_param *rqp, + int node, struct mlx5e_rq *rq) +{ + struct mlx5_core_dev *mdev = rq->mdev; + void *rqc = rqp->rqc; + void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); + u32 pool_size; + int wq_sz; + int err; + int i; + + rqp->wq.db_numa_node = node; + INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work); + + if (params->xdp_prog) + bpf_prog_inc(params->xdp_prog); + RCU_INIT_POINTER(rq->xdp_prog, params->xdp_prog); + + rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; + rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk); + pool_size = 1 << params->log_rq_mtu_frames; + + rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey); + + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq, + &rq->wq_ctrl); + if (err) + goto err_rq_xdp_prog; + + err = mlx5e_alloc_mpwqe_rq_drop_page(rq); + if (err) + goto err_rq_wq_destroy; + + rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR]; + + wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); + + rq->mpwqe.page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + rq->mpwqe.umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); + rq->mpwqe.pages_per_wqe = + mlx5e_mpwrq_pages_per_wqe(mdev, rq->mpwqe.page_shift, + rq->mpwqe.umr_mode); + rq->mpwqe.umr_wqebbs = + mlx5e_mpwrq_umr_wqebbs(mdev, rq->mpwqe.page_shift, + rq->mpwqe.umr_mode); + rq->mpwqe.mtts_per_wqe = + mlx5e_mpwrq_mtts_per_wqe(mdev, rq->mpwqe.page_shift, + rq->mpwqe.umr_mode); + + pool_size = rq->mpwqe.pages_per_wqe << + mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk); + + if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk) && params->xdp_prog) + pool_size *= 2; /* additional page per packet for the linear part */ + + rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + rq->mpwqe.num_strides = + BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk)); + rq->mpwqe.min_wqe_bulk = mlx5e_mpwqe_get_min_wqe_bulk(wq_sz); + + rq->buff.frame0_sz = (1 << rq->mpwqe.log_stride_sz); + + err = mlx5e_create_rq_umr_mkey(mdev, rq); + if (err) + goto err_rq_drop_page; + + err = mlx5e_rq_alloc_mpwqe_info(rq, node); + if (err) + goto err_rq_mkey; + + err = mlx5_rq_shampo_alloc(mdev, params, rqp, rq, &pool_size, node); + if (err) + goto err_free_mpwqe_info; + + break; + default: /* MLX5_WQ_TYPE_CYCLIC */ + err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq, + &rq->wq_ctrl); + if (err) + goto err_rq_xdp_prog; + + rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR]; + + wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq); + + rq->wqe.info = rqp->frags_info; + rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride; + + err = mlx5e_init_wqe_alloc_info(rq, node); + if (err) + goto err_rq_wq_destroy; + } + + if (xsk) { + err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, + MEM_TYPE_XSK_BUFF_POOL, NULL); + xsk_pool_set_rxq_info(rq->xsk_pool, &rq->xdp_rxq); + } else { + /* Create a page_pool and register it with rxq */ + struct page_pool_params pp_params = { 0 }; + + pp_params.order = 0; + pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | PP_FLAG_PAGE_FRAG; + pp_params.pool_size = pool_size; + pp_params.nid = node; + pp_params.dev = rq->pdev; + pp_params.napi = rq->cq.napi; + pp_params.dma_dir = rq->buff.map_dir; + pp_params.max_len = PAGE_SIZE; + + /* page_pool can be used even when there is no rq->xdp_prog, + * given page_pool does not handle DMA mapping there is no + * required state to clear. And page_pool gracefully handle + * elevated refcnt. + */ + rq->page_pool = page_pool_create(&pp_params); + if (IS_ERR(rq->page_pool)) { + err = PTR_ERR(rq->page_pool); + rq->page_pool = NULL; + goto err_free_by_rq_type; + } + if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) + err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, + MEM_TYPE_PAGE_POOL, rq->page_pool); + } + if (err) + goto err_destroy_page_pool; + + for (i = 0; i < wq_sz; i++) { + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + struct mlx5e_rx_wqe_ll *wqe = + mlx5_wq_ll_get_wqe(&rq->mpwqe.wq, i); + u32 byte_count = + rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz; + u64 dma_offset = mul_u32_u32(i, rq->mpwqe.mtts_per_wqe) << + rq->mpwqe.page_shift; + u16 headroom = test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) ? + 0 : rq->buff.headroom; + + wqe->data[0].addr = cpu_to_be64(dma_offset + headroom); + wqe->data[0].byte_count = cpu_to_be32(byte_count); + wqe->data[0].lkey = rq->mpwqe.umr_mkey_be; + } else { + struct mlx5e_rx_wqe_cyc *wqe = + mlx5_wq_cyc_get_wqe(&rq->wqe.wq, i); + int f; + + for (f = 0; f < rq->wqe.info.num_frags; f++) { + u32 frag_size = rq->wqe.info.arr[f].frag_size | + MLX5_HW_START_PADDING; + + wqe->data[f].byte_count = cpu_to_be32(frag_size); + wqe->data[f].lkey = rq->mkey_be; + } + /* check if num_frags is not a pow of two */ + if (rq->wqe.info.num_frags < (1 << rq->wqe.info.log_num_frags)) { + wqe->data[f].byte_count = 0; + wqe->data[f].lkey = params->terminate_lkey_be; + wqe->data[f].addr = 0; + } + } + } + + INIT_WORK(&rq->dim.work, mlx5e_rx_dim_work); + + switch (params->rx_cq_moderation.cq_period_mode) { + case MLX5_CQ_PERIOD_MODE_START_FROM_CQE: + rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE; + break; + case MLX5_CQ_PERIOD_MODE_START_FROM_EQE: + default: + rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + } + + return 0; + +err_destroy_page_pool: + page_pool_destroy(rq->page_pool); +err_free_by_rq_type: + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + mlx5e_rq_free_shampo(rq); +err_free_mpwqe_info: + kvfree(rq->mpwqe.info); +err_rq_mkey: + mlx5_core_destroy_mkey(mdev, be32_to_cpu(rq->mpwqe.umr_mkey_be)); +err_rq_drop_page: + mlx5e_free_mpwqe_rq_drop_page(rq); + break; + default: /* MLX5_WQ_TYPE_CYCLIC */ + mlx5e_free_wqe_alloc_info(rq); + } +err_rq_wq_destroy: + mlx5_wq_destroy(&rq->wq_ctrl); +err_rq_xdp_prog: + if (params->xdp_prog) + bpf_prog_put(params->xdp_prog); + + return err; +} + +static void mlx5e_free_rq(struct mlx5e_rq *rq) +{ + struct bpf_prog *old_prog; + + if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) { + old_prog = rcu_dereference_protected(rq->xdp_prog, + lockdep_is_held(&rq->priv->state_lock)); + if (old_prog) + bpf_prog_put(old_prog); + } + + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + kvfree(rq->mpwqe.info); + mlx5_core_destroy_mkey(rq->mdev, be32_to_cpu(rq->mpwqe.umr_mkey_be)); + mlx5e_free_mpwqe_rq_drop_page(rq); + mlx5e_rq_free_shampo(rq); + break; + default: /* MLX5_WQ_TYPE_CYCLIC */ + mlx5e_free_wqe_alloc_info(rq); + } + + xdp_rxq_info_unreg(&rq->xdp_rxq); + page_pool_destroy(rq->page_pool); + mlx5_wq_destroy(&rq->wq_ctrl); +} + +int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) +{ + struct mlx5_core_dev *mdev = rq->mdev; + u8 ts_format; + void *in; + void *rqc; + void *wq; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_rq_in) + + sizeof(u64) * rq->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + ts_format = mlx5_is_real_time_rq(mdev) ? + MLX5_TIMESTAMP_FORMAT_REAL_TIME : + MLX5_TIMESTAMP_FORMAT_FREE_RUNNING; + rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); + wq = MLX5_ADDR_OF(rqc, rqc, wq); + + memcpy(rqc, param->rqc, sizeof(param->rqc)); + + MLX5_SET(rqc, rqc, cqn, rq->cq.mcq.cqn); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); + MLX5_SET(rqc, rqc, ts_format, ts_format); + MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma); + + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) { + MLX5_SET(wq, wq, log_headers_buffer_entry_num, + order_base_2(rq->mpwqe.shampo->hd_per_wq)); + MLX5_SET(wq, wq, headers_mkey, rq->mpwqe.shampo->mkey); + } + + mlx5_fill_page_frag_array(&rq->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); + + err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn); + + kvfree(in); + + return err; +} + +static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state) +{ + struct mlx5_core_dev *mdev = rq->mdev; + + void *in; + void *rqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rq_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + if (curr_state == MLX5_RQC_STATE_RST && next_state == MLX5_RQC_STATE_RDY) + mlx5e_rqwq_reset(rq); + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + + MLX5_SET(modify_rq_in, in, rq_state, curr_state); + MLX5_SET(rqc, rqc, state, next_state); + + err = mlx5_core_modify_rq(mdev, rq->rqn, in); + + kvfree(in); + + return err; +} + +static void mlx5e_flush_rq_cq(struct mlx5e_rq *rq) +{ + struct mlx5_cqwq *cqwq = &rq->cq.wq; + struct mlx5_cqe64 *cqe; + + if (test_bit(MLX5E_RQ_STATE_MINI_CQE_ENHANCED, &rq->state)) { + while ((cqe = mlx5_cqwq_get_cqe_enahnced_comp(cqwq))) + mlx5_cqwq_pop(cqwq); + } else { + while ((cqe = mlx5_cqwq_get_cqe(cqwq))) + mlx5_cqwq_pop(cqwq); + } + + mlx5_cqwq_update_db_record(cqwq); +} + +int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state) +{ + struct net_device *dev = rq->netdev; + int err; + + err = mlx5e_modify_rq_state(rq, curr_state, MLX5_RQC_STATE_RST); + if (err) { + netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn); + return err; + } + + mlx5e_free_rx_descs(rq); + mlx5e_flush_rq_cq(rq); + + err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); + if (err) { + netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn); + return err; + } + + return 0; +} + +static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) +{ + struct mlx5_core_dev *mdev = rq->mdev; + void *in; + void *rqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rq_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + + MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY); + MLX5_SET64(modify_rq_in, in, modify_bitmask, + MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD); + MLX5_SET(rqc, rqc, vsd, vsd); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY); + + err = mlx5_core_modify_rq(mdev, rq->rqn, in); + + kvfree(in); + + return err; +} + +void mlx5e_destroy_rq(struct mlx5e_rq *rq) +{ + mlx5_core_destroy_rq(rq->mdev, rq->rqn); +} + +int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time) +{ + unsigned long exp_time = jiffies + msecs_to_jiffies(wait_time); + + u16 min_wqes = mlx5_min_rx_wqes(rq->wq_type, mlx5e_rqwq_get_size(rq)); + + do { + if (mlx5e_rqwq_get_cur_sz(rq) >= min_wqes) + return 0; + + msleep(20); + } while (time_before(jiffies, exp_time)); + + netdev_warn(rq->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n", + rq->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes); + + mlx5e_reporter_rx_timeout(rq); + return -ETIMEDOUT; +} + +void mlx5e_free_rx_missing_descs(struct mlx5e_rq *rq) +{ + struct mlx5_wq_ll *wq; + u16 head; + int i; + + if (rq->wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + return; + + wq = &rq->mpwqe.wq; + head = wq->head; + + /* Release WQEs that are in missing state: they have been + * popped from the list after completion but were not freed + * due to deferred release. + * Also free the linked-list reserved entry, hence the "+ 1". + */ + for (i = 0; i < mlx5_wq_ll_missing(wq) + 1; i++) { + rq->dealloc_wqe(rq, head); + head = mlx5_wq_ll_get_wqe_next_ix(wq, head); + } + + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) { + u16 len; + + len = (rq->mpwqe.shampo->pi - rq->mpwqe.shampo->ci) & + (rq->mpwqe.shampo->hd_per_wq - 1); + mlx5e_shampo_dealloc_hd(rq, len, rq->mpwqe.shampo->ci, false); + rq->mpwqe.shampo->pi = rq->mpwqe.shampo->ci; + } + + rq->mpwqe.actual_wq_head = wq->head; + rq->mpwqe.umr_in_progress = 0; + rq->mpwqe.umr_completed = 0; +} + +void mlx5e_free_rx_descs(struct mlx5e_rq *rq) +{ + __be16 wqe_ix_be; + u16 wqe_ix; + + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + struct mlx5_wq_ll *wq = &rq->mpwqe.wq; + + mlx5e_free_rx_missing_descs(rq); + + while (!mlx5_wq_ll_is_empty(wq)) { + struct mlx5e_rx_wqe_ll *wqe; + + wqe_ix_be = *wq->tail_next; + wqe_ix = be16_to_cpu(wqe_ix_be); + wqe = mlx5_wq_ll_get_wqe(wq, wqe_ix); + rq->dealloc_wqe(rq, wqe_ix); + mlx5_wq_ll_pop(wq, wqe_ix_be, + &wqe->next.next_wqe_index); + } + + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) + mlx5e_shampo_dealloc_hd(rq, rq->mpwqe.shampo->hd_per_wq, + 0, true); + } else { + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + u16 missing = mlx5_wq_cyc_missing(wq); + u16 head = mlx5_wq_cyc_get_head(wq); + + while (!mlx5_wq_cyc_is_empty(wq)) { + wqe_ix = mlx5_wq_cyc_get_tail(wq); + rq->dealloc_wqe(rq, wqe_ix); + mlx5_wq_cyc_pop(wq); + } + /* Missing slots might also contain unreleased pages due to + * deferred release. + */ + while (missing--) { + wqe_ix = mlx5_wq_cyc_ctr2ix(wq, head++); + rq->dealloc_wqe(rq, wqe_ix); + } + } + +} + +int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, + struct mlx5e_xsk_param *xsk, int node, + struct mlx5e_rq *rq) +{ + struct mlx5_core_dev *mdev = rq->mdev; + int err; + + if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) + __set_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state); + + err = mlx5e_alloc_rq(params, xsk, param, node, rq); + if (err) + return err; + + err = mlx5e_create_rq(rq, param); + if (err) + goto err_free_rq; + + err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); + if (err) + goto err_destroy_rq; + + if (MLX5_CAP_ETH(mdev, cqe_checksum_full)) + __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state); + + if (params->rx_dim_enabled) + __set_bit(MLX5E_RQ_STATE_DIM, &rq->state); + + /* We disable csum_complete when XDP is enabled since + * XDP programs might manipulate packets which will render + * skb->checksum incorrect. + */ + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || params->xdp_prog) + __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state); + + /* For CQE compression on striding RQ, use stride index provided by + * HW if capability is supported. + */ + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) && + MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index)) + __set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state); + + /* For enhanced CQE compression packet processing. decompress + * session according to the enhanced layout. + */ + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) && + MLX5_CAP_GEN(mdev, enhanced_cqe_compression)) + __set_bit(MLX5E_RQ_STATE_MINI_CQE_ENHANCED, &rq->state); + + return 0; + +err_destroy_rq: + mlx5e_destroy_rq(rq); +err_free_rq: + mlx5e_free_rq(rq); + + return err; +} + +void mlx5e_activate_rq(struct mlx5e_rq *rq) +{ + set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); +} + +void mlx5e_deactivate_rq(struct mlx5e_rq *rq) +{ + clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); + synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */ +} + +void mlx5e_close_rq(struct mlx5e_rq *rq) +{ + cancel_work_sync(&rq->dim.work); + cancel_work_sync(&rq->recover_work); + mlx5e_destroy_rq(rq); + mlx5e_free_rx_descs(rq); + mlx5e_free_rq(rq); +} + +static void mlx5e_free_xdpsq_db(struct mlx5e_xdpsq *sq) +{ + kvfree(sq->db.xdpi_fifo.xi); + kvfree(sq->db.wqe_info); +} + +static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa) +{ + struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + int entries; + size_t size; + + /* upper bound for maximum num of entries of all xmit_modes. */ + entries = roundup_pow_of_two(wq_sz * MLX5_SEND_WQEBB_NUM_DS * + MLX5E_XDP_FIFO_ENTRIES2DS_MAX_RATIO); + + size = array_size(sizeof(*xdpi_fifo->xi), entries); + xdpi_fifo->xi = kvzalloc_node(size, GFP_KERNEL, numa); + if (!xdpi_fifo->xi) + return -ENOMEM; + + xdpi_fifo->pc = &sq->xdpi_fifo_pc; + xdpi_fifo->cc = &sq->xdpi_fifo_cc; + xdpi_fifo->mask = entries - 1; + + return 0; +} + +static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa) +{ + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + size_t size; + int err; + + size = array_size(sizeof(*sq->db.wqe_info), wq_sz); + sq->db.wqe_info = kvzalloc_node(size, GFP_KERNEL, numa); + if (!sq->db.wqe_info) + return -ENOMEM; + + err = mlx5e_alloc_xdpsq_fifo(sq, numa); + if (err) { + mlx5e_free_xdpsq_db(sq); + return err; + } + + return 0; +} + +static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct xsk_buff_pool *xsk_pool, + struct mlx5e_sq_param *param, + struct mlx5e_xdpsq *sq, + bool is_redirect) +{ + void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); + struct mlx5_core_dev *mdev = c->mdev; + struct mlx5_wq_cyc *wq = &sq->wq; + int err; + + sq->pdev = c->pdev; + sq->mkey_be = c->mkey_be; + sq->channel = c; + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; + sq->min_inline_mode = params->tx_min_inline_mode; + sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN; + sq->xsk_pool = xsk_pool; + + sq->stats = sq->xsk_pool ? + &c->priv->channel_stats[c->ix]->xsksq : + is_redirect ? + &c->priv->channel_stats[c->ix]->xdpsq : + &c->priv->channel_stats[c->ix]->rq_xdpsq; + sq->stop_room = param->is_mpw ? mlx5e_stop_room_for_mpwqe(mdev) : + mlx5e_stop_room_for_max_wqe(mdev); + sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); + + param->wq.db_numa_node = cpu_to_node(c->cpu); + err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); + if (err) + return err; + wq->db = &wq->db[MLX5_SND_DBR]; + + err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu)); + if (err) + goto err_sq_wq_destroy; + + return 0; + +err_sq_wq_destroy: + mlx5_wq_destroy(&sq->wq_ctrl); + + return err; +} + +static void mlx5e_free_xdpsq(struct mlx5e_xdpsq *sq) +{ + mlx5e_free_xdpsq_db(sq); + mlx5_wq_destroy(&sq->wq_ctrl); +} + +static void mlx5e_free_icosq_db(struct mlx5e_icosq *sq) +{ + kvfree(sq->db.wqe_info); +} + +static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa) +{ + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + size_t size; + + size = array_size(wq_sz, sizeof(*sq->db.wqe_info)); + sq->db.wqe_info = kvzalloc_node(size, GFP_KERNEL, numa); + if (!sq->db.wqe_info) + return -ENOMEM; + + return 0; +} + +static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work) +{ + struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq, + recover_work); + + mlx5e_reporter_icosq_cqe_err(sq); +} + +static void mlx5e_async_icosq_err_cqe_work(struct work_struct *recover_work) +{ + struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq, + recover_work); + + /* Not implemented yet. */ + + netdev_warn(sq->channel->netdev, "async_icosq recovery is not implemented\n"); +} + +static int mlx5e_alloc_icosq(struct mlx5e_channel *c, + struct mlx5e_sq_param *param, + struct mlx5e_icosq *sq, + work_func_t recover_work_func) +{ + void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); + struct mlx5_core_dev *mdev = c->mdev; + struct mlx5_wq_cyc *wq = &sq->wq; + int err; + + sq->channel = c; + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; + sq->reserved_room = param->stop_room; + + param->wq.db_numa_node = cpu_to_node(c->cpu); + err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); + if (err) + return err; + wq->db = &wq->db[MLX5_SND_DBR]; + + err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu)); + if (err) + goto err_sq_wq_destroy; + + INIT_WORK(&sq->recover_work, recover_work_func); + + return 0; + +err_sq_wq_destroy: + mlx5_wq_destroy(&sq->wq_ctrl); + + return err; +} + +static void mlx5e_free_icosq(struct mlx5e_icosq *sq) +{ + mlx5e_free_icosq_db(sq); + mlx5_wq_destroy(&sq->wq_ctrl); +} + +void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq) +{ + kvfree(sq->db.wqe_info); + kvfree(sq->db.skb_fifo.fifo); + kvfree(sq->db.dma_fifo); +} + +int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa) +{ + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS; + + sq->db.dma_fifo = kvzalloc_node(array_size(df_sz, + sizeof(*sq->db.dma_fifo)), + GFP_KERNEL, numa); + sq->db.skb_fifo.fifo = kvzalloc_node(array_size(df_sz, + sizeof(*sq->db.skb_fifo.fifo)), + GFP_KERNEL, numa); + sq->db.wqe_info = kvzalloc_node(array_size(wq_sz, + sizeof(*sq->db.wqe_info)), + GFP_KERNEL, numa); + if (!sq->db.dma_fifo || !sq->db.skb_fifo.fifo || !sq->db.wqe_info) { + mlx5e_free_txqsq_db(sq); + return -ENOMEM; + } + + sq->dma_fifo_mask = df_sz - 1; + + sq->db.skb_fifo.pc = &sq->skb_fifo_pc; + sq->db.skb_fifo.cc = &sq->skb_fifo_cc; + sq->db.skb_fifo.mask = df_sz - 1; + + return 0; +} + +static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, + int txq_ix, + struct mlx5e_params *params, + struct mlx5e_sq_param *param, + struct mlx5e_txqsq *sq, + int tc) +{ + void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); + struct mlx5_core_dev *mdev = c->mdev; + struct mlx5_wq_cyc *wq = &sq->wq; + int err; + + sq->pdev = c->pdev; + sq->clock = &mdev->clock; + sq->mkey_be = c->mkey_be; + sq->netdev = c->netdev; + sq->mdev = c->mdev; + sq->channel = c; + sq->priv = c->priv; + sq->ch_ix = c->ix; + sq->txq_ix = txq_ix; + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; + sq->min_inline_mode = params->tx_min_inline_mode; + sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); + INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); + if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) + set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); + if (mlx5_ipsec_device_caps(c->priv->mdev)) + set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); + if (param->is_mpw) + set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state); + sq->stop_room = param->stop_room; + sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev); + + param->wq.db_numa_node = cpu_to_node(c->cpu); + err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); + if (err) + return err; + wq->db = &wq->db[MLX5_SND_DBR]; + + err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu)); + if (err) + goto err_sq_wq_destroy; + + INIT_WORK(&sq->dim.work, mlx5e_tx_dim_work); + sq->dim.mode = params->tx_cq_moderation.cq_period_mode; + + return 0; + +err_sq_wq_destroy: + mlx5_wq_destroy(&sq->wq_ctrl); + + return err; +} + +void mlx5e_free_txqsq(struct mlx5e_txqsq *sq) +{ + mlx5e_free_txqsq_db(sq); + mlx5_wq_destroy(&sq->wq_ctrl); +} + +static int mlx5e_create_sq(struct mlx5_core_dev *mdev, + struct mlx5e_sq_param *param, + struct mlx5e_create_sq_param *csp, + u32 *sqn) +{ + u8 ts_format; + void *in; + void *sqc; + void *wq; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_sq_in) + + sizeof(u64) * csp->wq_ctrl->buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + ts_format = mlx5_is_real_time_sq(mdev) ? + MLX5_TIMESTAMP_FORMAT_REAL_TIME : + MLX5_TIMESTAMP_FORMAT_FREE_RUNNING; + sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); + wq = MLX5_ADDR_OF(sqc, sqc, wq); + + memcpy(sqc, param->sqc, sizeof(param->sqc)); + MLX5_SET(sqc, sqc, tis_lst_sz, csp->tis_lst_sz); + MLX5_SET(sqc, sqc, tis_num_0, csp->tisn); + MLX5_SET(sqc, sqc, cqn, csp->cqn); + MLX5_SET(sqc, sqc, ts_cqe_to_dest_cqn, csp->ts_cqe_to_dest_cqn); + MLX5_SET(sqc, sqc, ts_format, ts_format); + + + if (MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) + MLX5_SET(sqc, sqc, min_wqe_inline_mode, csp->min_inline_mode); + + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); + MLX5_SET(sqc, sqc, flush_in_error_en, 1); + + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.hw_objs.bfreg.index); + MLX5_SET(wq, wq, log_wq_pg_sz, csp->wq_ctrl->buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(wq, wq, dbr_addr, csp->wq_ctrl->db.dma); + + mlx5_fill_page_frag_array(&csp->wq_ctrl->buf, + (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); + + err = mlx5_core_create_sq(mdev, in, inlen, sqn); + + kvfree(in); + + return err; +} + +int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, + struct mlx5e_modify_sq_param *p) +{ + u64 bitmask = 0; + void *in; + void *sqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_sq_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); + + MLX5_SET(modify_sq_in, in, sq_state, p->curr_state); + MLX5_SET(sqc, sqc, state, p->next_state); + if (p->rl_update && p->next_state == MLX5_SQC_STATE_RDY) { + bitmask |= 1; + MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, p->rl_index); + } + if (p->qos_update && p->next_state == MLX5_SQC_STATE_RDY) { + bitmask |= 1 << 2; + MLX5_SET(sqc, sqc, qos_queue_group_id, p->qos_queue_group_id); + } + MLX5_SET64(modify_sq_in, in, modify_bitmask, bitmask); + + err = mlx5_core_modify_sq(mdev, sqn, in); + + kvfree(in); + + return err; +} + +static void mlx5e_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn) +{ + mlx5_core_destroy_sq(mdev, sqn); +} + +int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev, + struct mlx5e_sq_param *param, + struct mlx5e_create_sq_param *csp, + u16 qos_queue_group_id, + u32 *sqn) +{ + struct mlx5e_modify_sq_param msp = {0}; + int err; + + err = mlx5e_create_sq(mdev, param, csp, sqn); + if (err) + return err; + + msp.curr_state = MLX5_SQC_STATE_RST; + msp.next_state = MLX5_SQC_STATE_RDY; + if (qos_queue_group_id) { + msp.qos_update = true; + msp.qos_queue_group_id = qos_queue_group_id; + } + err = mlx5e_modify_sq(mdev, *sqn, &msp); + if (err) + mlx5e_destroy_sq(mdev, *sqn); + + return err; +} + +static int mlx5e_set_sq_maxrate(struct net_device *dev, + struct mlx5e_txqsq *sq, u32 rate); + +int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix, + struct mlx5e_params *params, struct mlx5e_sq_param *param, + struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id, + struct mlx5e_sq_stats *sq_stats) +{ + struct mlx5e_create_sq_param csp = {}; + u32 tx_rate; + int err; + + err = mlx5e_alloc_txqsq(c, txq_ix, params, param, sq, tc); + if (err) + return err; + + sq->stats = sq_stats; + + csp.tisn = tisn; + csp.tis_lst_sz = 1; + csp.cqn = sq->cq.mcq.cqn; + csp.wq_ctrl = &sq->wq_ctrl; + csp.min_inline_mode = sq->min_inline_mode; + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, qos_queue_group_id, &sq->sqn); + if (err) + goto err_free_txqsq; + + tx_rate = c->priv->tx_rates[sq->txq_ix]; + if (tx_rate) + mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate); + + if (params->tx_dim_enabled) + sq->state |= BIT(MLX5E_SQ_STATE_DIM); + + return 0; + +err_free_txqsq: + mlx5e_free_txqsq(sq); + + return err; +} + +void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) +{ + sq->txq = netdev_get_tx_queue(sq->netdev, sq->txq_ix); + set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + netdev_tx_reset_queue(sq->txq); + netif_tx_start_queue(sq->txq); +} + +void mlx5e_tx_disable_queue(struct netdev_queue *txq) +{ + __netif_tx_lock_bh(txq); + netif_tx_stop_queue(txq); + __netif_tx_unlock_bh(txq); +} + +void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + synchronize_net(); /* Sync with NAPI to prevent netif_tx_wake_queue. */ + + mlx5e_tx_disable_queue(sq->txq); + + /* last doorbell out, godspeed .. */ + if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) { + u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + struct mlx5e_tx_wqe *nop; + + sq->db.wqe_info[pi] = (struct mlx5e_tx_wqe_info) { + .num_wqebbs = 1, + }; + + nop = mlx5e_post_nop(wq, sq->sqn, &sq->pc); + mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nop->ctrl); + } +} + +void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) +{ + struct mlx5_core_dev *mdev = sq->mdev; + struct mlx5_rate_limit rl = {0}; + + cancel_work_sync(&sq->dim.work); + cancel_work_sync(&sq->recover_work); + mlx5e_destroy_sq(mdev, sq->sqn); + if (sq->rate_limit) { + rl.rate = sq->rate_limit; + mlx5_rl_remove_rate(mdev, &rl); + } + mlx5e_free_txqsq_descs(sq); + mlx5e_free_txqsq(sq); +} + +void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) +{ + struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq, + recover_work); + + mlx5e_reporter_tx_err_cqe(sq); +} + +static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct mlx5e_icosq *sq, + work_func_t recover_work_func) +{ + struct mlx5e_create_sq_param csp = {}; + int err; + + err = mlx5e_alloc_icosq(c, param, sq, recover_work_func); + if (err) + return err; + + csp.cqn = sq->cq.mcq.cqn; + csp.wq_ctrl = &sq->wq_ctrl; + csp.min_inline_mode = params->tx_min_inline_mode; + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn); + if (err) + goto err_free_icosq; + + if (param->is_tls) { + sq->ktls_resync = mlx5e_ktls_rx_resync_create_resp_list(); + if (IS_ERR(sq->ktls_resync)) { + err = PTR_ERR(sq->ktls_resync); + goto err_destroy_icosq; + } + } + return 0; + +err_destroy_icosq: + mlx5e_destroy_sq(c->mdev, sq->sqn); +err_free_icosq: + mlx5e_free_icosq(sq); + + return err; +} + +void mlx5e_activate_icosq(struct mlx5e_icosq *icosq) +{ + set_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state); +} + +void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq) +{ + clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state); + synchronize_net(); /* Sync with NAPI. */ +} + +static void mlx5e_close_icosq(struct mlx5e_icosq *sq) +{ + struct mlx5e_channel *c = sq->channel; + + if (sq->ktls_resync) + mlx5e_ktls_rx_resync_destroy_resp_list(sq->ktls_resync); + mlx5e_destroy_sq(c->mdev, sq->sqn); + mlx5e_free_icosq_descs(sq); + mlx5e_free_icosq(sq); +} + +int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool, + struct mlx5e_xdpsq *sq, bool is_redirect) +{ + struct mlx5e_create_sq_param csp = {}; + int err; + + err = mlx5e_alloc_xdpsq(c, params, xsk_pool, param, sq, is_redirect); + if (err) + return err; + + csp.tis_lst_sz = 1; + csp.tisn = c->priv->tisn[c->lag_port][0]; /* tc = 0 */ + csp.cqn = sq->cq.mcq.cqn; + csp.wq_ctrl = &sq->wq_ctrl; + csp.min_inline_mode = sq->min_inline_mode; + set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + + if (param->is_xdp_mb) + set_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state); + + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn); + if (err) + goto err_free_xdpsq; + + mlx5e_set_xmit_fp(sq, param->is_mpw); + + if (!param->is_mpw && !test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) { + unsigned int ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1; + unsigned int inline_hdr_sz = 0; + int i; + + if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { + inline_hdr_sz = MLX5E_XDP_MIN_INLINE; + ds_cnt++; + } + + /* Pre initialize fixed WQE fields */ + for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) { + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i); + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_eth_seg *eseg = &wqe->eth; + + sq->db.wqe_info[i] = (struct mlx5e_xdp_wqe_info) { + .num_wqebbs = 1, + .num_pkts = 1, + }; + + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); + } + } + + return 0; + +err_free_xdpsq: + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + mlx5e_free_xdpsq(sq); + + return err; +} + +void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq) +{ + struct mlx5e_channel *c = sq->channel; + + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + synchronize_net(); /* Sync with NAPI. */ + + mlx5e_destroy_sq(c->mdev, sq->sqn); + mlx5e_free_xdpsq_descs(sq); + mlx5e_free_xdpsq(sq); +} + +static int mlx5e_alloc_cq_common(struct mlx5e_priv *priv, + struct mlx5e_cq_param *param, + struct mlx5e_cq *cq) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_core_cq *mcq = &cq->mcq; + int err; + u32 i; + + err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, + &cq->wq_ctrl); + if (err) + return err; + + mcq->cqe_sz = 64; + mcq->set_ci_db = cq->wq_ctrl.db.db; + mcq->arm_db = cq->wq_ctrl.db.db + 1; + *mcq->set_ci_db = 0; + *mcq->arm_db = 0; + mcq->vector = param->eq_ix; + mcq->comp = mlx5e_completion_event; + mcq->event = mlx5e_cq_error_event; + + for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); + + cqe->op_own = 0xf1; + cqe->validity_iteration_count = 0xff; + } + + cq->mdev = mdev; + cq->netdev = priv->netdev; + cq->priv = priv; + + return 0; +} + +static int mlx5e_alloc_cq(struct mlx5e_priv *priv, + struct mlx5e_cq_param *param, + struct mlx5e_create_cq_param *ccp, + struct mlx5e_cq *cq) +{ + int err; + + param->wq.buf_numa_node = ccp->node; + param->wq.db_numa_node = ccp->node; + param->eq_ix = ccp->ix; + + err = mlx5e_alloc_cq_common(priv, param, cq); + + cq->napi = ccp->napi; + cq->ch_stats = ccp->ch_stats; + + return err; +} + +static void mlx5e_free_cq(struct mlx5e_cq *cq) +{ + mlx5_wq_destroy(&cq->wq_ctrl); +} + +static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) +{ + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; + struct mlx5_core_dev *mdev = cq->mdev; + struct mlx5_core_cq *mcq = &cq->mcq; + + void *in; + void *cqc; + int inlen; + int eqn; + int err; + + err = mlx5_comp_eqn_get(mdev, param->eq_ix, &eqn); + if (err) + return err; + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + sizeof(u64) * cq->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + + memcpy(cqc, param->cqc, sizeof(param->cqc)); + + mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); + + MLX5_SET(cqc, cqc, cq_period_mode, param->cq_period_mode); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); + MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); + MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); + + err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out)); + + kvfree(in); + + if (err) + return err; + + mlx5e_cq_arm(cq); + + return 0; +} + +static void mlx5e_destroy_cq(struct mlx5e_cq *cq) +{ + mlx5_core_destroy_cq(cq->mdev, &cq->mcq); +} + +int mlx5e_open_cq(struct mlx5e_priv *priv, struct dim_cq_moder moder, + struct mlx5e_cq_param *param, struct mlx5e_create_cq_param *ccp, + struct mlx5e_cq *cq) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + err = mlx5e_alloc_cq(priv, param, ccp, cq); + if (err) + return err; + + err = mlx5e_create_cq(cq, param); + if (err) + goto err_free_cq; + + if (MLX5_CAP_GEN(mdev, cq_moderation)) + mlx5_core_modify_cq_moderation(mdev, &cq->mcq, moder.usec, moder.pkts); + return 0; + +err_free_cq: + mlx5e_free_cq(cq); + + return err; +} + +void mlx5e_close_cq(struct mlx5e_cq *cq) +{ + mlx5e_destroy_cq(cq); + mlx5e_free_cq(cq); +} + +static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_create_cq_param *ccp, + struct mlx5e_channel_param *cparam) +{ + int err; + int tc; + + for (tc = 0; tc < c->num_tc; tc++) { + err = mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->txq_sq.cqp, + ccp, &c->sq[tc].cq); + if (err) + goto err_close_tx_cqs; + } + + return 0; + +err_close_tx_cqs: + for (tc--; tc >= 0; tc--) + mlx5e_close_cq(&c->sq[tc].cq); + + return err; +} + +static void mlx5e_close_tx_cqs(struct mlx5e_channel *c) +{ + int tc; + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_close_cq(&c->sq[tc].cq); +} + +static int mlx5e_mqprio_txq_to_tc(struct netdev_tc_txq *tc_to_txq, unsigned int txq) +{ + int tc; + + for (tc = 0; tc < TC_MAX_QUEUE; tc++) + if (txq - tc_to_txq[tc].offset < tc_to_txq[tc].count) + return tc; + + WARN(1, "Unexpected TCs configuration. No match found for txq %u", txq); + return -ENOENT; +} + +static int mlx5e_txq_get_qos_node_hw_id(struct mlx5e_params *params, int txq_ix, + u32 *hw_id) +{ + int tc; + + if (params->mqprio.mode != TC_MQPRIO_MODE_CHANNEL) { + *hw_id = 0; + return 0; + } + + tc = mlx5e_mqprio_txq_to_tc(params->mqprio.tc_to_txq, txq_ix); + if (tc < 0) + return tc; + + if (tc >= params->mqprio.num_tc) { + WARN(1, "Unexpected TCs configuration. tc %d is out of range of %u", + tc, params->mqprio.num_tc); + return -EINVAL; + } + + *hw_id = params->mqprio.channel.hw_id[tc]; + return 0; +} + +static int mlx5e_open_sqs(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_channel_param *cparam) +{ + int err, tc; + + for (tc = 0; tc < mlx5e_get_dcb_num_tc(params); tc++) { + int txq_ix = c->ix + tc * params->num_channels; + u32 qos_queue_group_id; + + err = mlx5e_txq_get_qos_node_hw_id(params, txq_ix, &qos_queue_group_id); + if (err) + goto err_close_sqs; + + err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix, + params, &cparam->txq_sq, &c->sq[tc], tc, + qos_queue_group_id, + &c->priv->channel_stats[c->ix]->sq[tc]); + if (err) + goto err_close_sqs; + } + + return 0; + +err_close_sqs: + for (tc--; tc >= 0; tc--) + mlx5e_close_txqsq(&c->sq[tc]); + + return err; +} + +static void mlx5e_close_sqs(struct mlx5e_channel *c) +{ + int tc; + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_close_txqsq(&c->sq[tc]); +} + +static int mlx5e_set_sq_maxrate(struct net_device *dev, + struct mlx5e_txqsq *sq, u32 rate) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_modify_sq_param msp = {0}; + struct mlx5_rate_limit rl = {0}; + u16 rl_index = 0; + int err; + + if (rate == sq->rate_limit) + /* nothing to do */ + return 0; + + if (sq->rate_limit) { + rl.rate = sq->rate_limit; + /* remove current rl index to free space to next ones */ + mlx5_rl_remove_rate(mdev, &rl); + } + + sq->rate_limit = 0; + + if (rate) { + rl.rate = rate; + err = mlx5_rl_add_rate(mdev, &rl_index, &rl); + if (err) { + netdev_err(dev, "Failed configuring rate %u: %d\n", + rate, err); + return err; + } + } + + msp.curr_state = MLX5_SQC_STATE_RDY; + msp.next_state = MLX5_SQC_STATE_RDY; + msp.rl_index = rl_index; + msp.rl_update = true; + err = mlx5e_modify_sq(mdev, sq->sqn, &msp); + if (err) { + netdev_err(dev, "Failed configuring rate %u: %d\n", + rate, err); + /* remove the rate from the table */ + if (rate) + mlx5_rl_remove_rate(mdev, &rl); + return err; + } + + sq->rate_limit = rate; + return 0; +} + +static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_txqsq *sq = priv->txq2sq[index]; + int err = 0; + + if (!mlx5_rl_is_supported(mdev)) { + netdev_err(dev, "Rate limiting is not supported on this device\n"); + return -EINVAL; + } + + /* rate is given in Mb/sec, HW config is in Kb/sec */ + rate = rate << 10; + + /* Check whether rate in valid range, 0 is always valid */ + if (rate && !mlx5_rl_is_in_range(mdev, rate)) { + netdev_err(dev, "TX rate %u, is not in range\n", rate); + return -ERANGE; + } + + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + err = mlx5e_set_sq_maxrate(dev, sq, rate); + if (!err) + priv->tx_rates[index] = rate; + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_rq_param *rq_params) +{ + int err; + + err = mlx5e_init_rxq_rq(c, params, rq_params->xdp_frag_size, &c->rq); + if (err) + return err; + + return mlx5e_open_rq(params, rq_params, NULL, cpu_to_node(c->cpu), &c->rq); +} + +static int mlx5e_open_queues(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_channel_param *cparam) +{ + struct dim_cq_moder icocq_moder = {0, 0}; + struct mlx5e_create_cq_param ccp; + int err; + + mlx5e_build_create_cq_param(&ccp, c); + + err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->async_icosq.cqp, &ccp, + &c->async_icosq.cq); + if (err) + return err; + + err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->icosq.cqp, &ccp, + &c->icosq.cq); + if (err) + goto err_close_async_icosq_cq; + + err = mlx5e_open_tx_cqs(c, params, &ccp, cparam); + if (err) + goto err_close_icosq_cq; + + err = mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->xdp_sq.cqp, &ccp, + &c->xdpsq.cq); + if (err) + goto err_close_tx_cqs; + + err = mlx5e_open_cq(c->priv, params->rx_cq_moderation, &cparam->rq.cqp, &ccp, + &c->rq.cq); + if (err) + goto err_close_xdp_tx_cqs; + + err = c->xdp ? mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->xdp_sq.cqp, + &ccp, &c->rq_xdpsq.cq) : 0; + if (err) + goto err_close_rx_cq; + + spin_lock_init(&c->async_icosq_lock); + + err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq, + mlx5e_async_icosq_err_cqe_work); + if (err) + goto err_close_xdpsq_cq; + + mutex_init(&c->icosq_recovery_lock); + + err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq, + mlx5e_icosq_err_cqe_work); + if (err) + goto err_close_async_icosq; + + err = mlx5e_open_sqs(c, params, cparam); + if (err) + goto err_close_icosq; + + err = mlx5e_open_rxq_rq(c, params, &cparam->rq); + if (err) + goto err_close_sqs; + + if (c->xdp) { + err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, + &c->rq_xdpsq, false); + if (err) + goto err_close_rq; + } + + err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, &c->xdpsq, true); + if (err) + goto err_close_xdp_sq; + + return 0; + +err_close_xdp_sq: + if (c->xdp) + mlx5e_close_xdpsq(&c->rq_xdpsq); + +err_close_rq: + mlx5e_close_rq(&c->rq); + +err_close_sqs: + mlx5e_close_sqs(c); + +err_close_icosq: + mlx5e_close_icosq(&c->icosq); + +err_close_async_icosq: + mlx5e_close_icosq(&c->async_icosq); + +err_close_xdpsq_cq: + if (c->xdp) + mlx5e_close_cq(&c->rq_xdpsq.cq); + +err_close_rx_cq: + mlx5e_close_cq(&c->rq.cq); + +err_close_xdp_tx_cqs: + mlx5e_close_cq(&c->xdpsq.cq); + +err_close_tx_cqs: + mlx5e_close_tx_cqs(c); + +err_close_icosq_cq: + mlx5e_close_cq(&c->icosq.cq); + +err_close_async_icosq_cq: + mlx5e_close_cq(&c->async_icosq.cq); + + return err; +} + +static void mlx5e_close_queues(struct mlx5e_channel *c) +{ + mlx5e_close_xdpsq(&c->xdpsq); + if (c->xdp) + mlx5e_close_xdpsq(&c->rq_xdpsq); + /* The same ICOSQ is used for UMRs for both RQ and XSKRQ. */ + cancel_work_sync(&c->icosq.recover_work); + mlx5e_close_rq(&c->rq); + mlx5e_close_sqs(c); + mlx5e_close_icosq(&c->icosq); + mutex_destroy(&c->icosq_recovery_lock); + mlx5e_close_icosq(&c->async_icosq); + if (c->xdp) + mlx5e_close_cq(&c->rq_xdpsq.cq); + mlx5e_close_cq(&c->rq.cq); + mlx5e_close_cq(&c->xdpsq.cq); + mlx5e_close_tx_cqs(c); + mlx5e_close_cq(&c->icosq.cq); + mlx5e_close_cq(&c->async_icosq.cq); +} + +static u8 mlx5e_enumerate_lag_port(struct mlx5_core_dev *mdev, int ix) +{ + u16 port_aff_bias = mlx5_core_is_pf(mdev) ? 0 : MLX5_CAP_GEN(mdev, vhca_id); + + return (ix + port_aff_bias) % mlx5e_get_num_lag_ports(mdev); +} + +static int mlx5e_channel_stats_alloc(struct mlx5e_priv *priv, int ix, int cpu) +{ + if (ix > priv->stats_nch) { + netdev_warn(priv->netdev, "Unexpected channel stats index %d > %d\n", ix, + priv->stats_nch); + return -EINVAL; + } + + if (priv->channel_stats[ix]) + return 0; + + /* Asymmetric dynamic memory allocation. + * Freed in mlx5e_priv_arrays_free, not on channel closure. + */ + netdev_dbg(priv->netdev, "Creating channel stats %d\n", ix); + priv->channel_stats[ix] = kvzalloc_node(sizeof(**priv->channel_stats), + GFP_KERNEL, cpu_to_node(cpu)); + if (!priv->channel_stats[ix]) + return -ENOMEM; + priv->stats_nch++; + + return 0; +} + +void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c) +{ + spin_lock_bh(&c->async_icosq_lock); + mlx5e_trigger_irq(&c->async_icosq); + spin_unlock_bh(&c->async_icosq_lock); +} + +void mlx5e_trigger_napi_sched(struct napi_struct *napi) +{ + local_bh_disable(); + napi_schedule(napi); + local_bh_enable(); +} + +static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, + struct mlx5e_params *params, + struct mlx5e_channel_param *cparam, + struct xsk_buff_pool *xsk_pool, + struct mlx5e_channel **cp) +{ + int cpu = mlx5_comp_vector_get_cpu(priv->mdev, ix); + struct net_device *netdev = priv->netdev; + struct mlx5e_xsk_param xsk; + struct mlx5e_channel *c; + unsigned int irq; + int err; + + err = mlx5_comp_irqn_get(priv->mdev, ix, &irq); + if (err) + return err; + + err = mlx5e_channel_stats_alloc(priv, ix, cpu); + if (err) + return err; + + c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); + if (!c) + return -ENOMEM; + + c->priv = priv; + c->mdev = priv->mdev; + c->tstamp = &priv->tstamp; + c->ix = ix; + c->cpu = cpu; + c->pdev = mlx5_core_dma_dev(priv->mdev); + c->netdev = priv->netdev; + c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey); + c->num_tc = mlx5e_get_dcb_num_tc(params); + c->xdp = !!params->xdp_prog; + c->stats = &priv->channel_stats[ix]->ch; + c->aff_mask = irq_get_effective_affinity_mask(irq); + c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix); + + netif_napi_add(netdev, &c->napi, mlx5e_napi_poll); + + err = mlx5e_open_queues(c, params, cparam); + if (unlikely(err)) + goto err_napi_del; + + if (xsk_pool) { + mlx5e_build_xsk_param(xsk_pool, &xsk); + err = mlx5e_open_xsk(priv, params, &xsk, xsk_pool, c); + if (unlikely(err)) + goto err_close_queues; + } + + *cp = c; + + return 0; + +err_close_queues: + mlx5e_close_queues(c); + +err_napi_del: + netif_napi_del(&c->napi); + + kvfree(c); + + return err; +} + +static void mlx5e_activate_channel(struct mlx5e_channel *c) +{ + int tc; + + napi_enable(&c->napi); + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_activate_txqsq(&c->sq[tc]); + mlx5e_activate_icosq(&c->icosq); + mlx5e_activate_icosq(&c->async_icosq); + + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + mlx5e_activate_xsk(c); + else + mlx5e_activate_rq(&c->rq); +} + +static void mlx5e_deactivate_channel(struct mlx5e_channel *c) +{ + int tc; + + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + mlx5e_deactivate_xsk(c); + else + mlx5e_deactivate_rq(&c->rq); + + mlx5e_deactivate_icosq(&c->async_icosq); + mlx5e_deactivate_icosq(&c->icosq); + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_deactivate_txqsq(&c->sq[tc]); + mlx5e_qos_deactivate_queues(c); + + napi_disable(&c->napi); +} + +static void mlx5e_close_channel(struct mlx5e_channel *c) +{ + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + mlx5e_close_xsk(c); + mlx5e_close_queues(c); + mlx5e_qos_close_queues(c); + netif_napi_del(&c->napi); + + kvfree(c); +} + +int mlx5e_open_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *chs) +{ + struct mlx5e_channel_param *cparam; + int err = -ENOMEM; + int i; + + chs->num = chs->params.num_channels; + + chs->c = kcalloc(chs->num, sizeof(struct mlx5e_channel *), GFP_KERNEL); + cparam = kvzalloc(sizeof(struct mlx5e_channel_param), GFP_KERNEL); + if (!chs->c || !cparam) + goto err_free; + + err = mlx5e_build_channel_param(priv->mdev, &chs->params, priv->q_counter, cparam); + if (err) + goto err_free; + + for (i = 0; i < chs->num; i++) { + struct xsk_buff_pool *xsk_pool = NULL; + + if (chs->params.xdp_prog) + xsk_pool = mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, i); + + err = mlx5e_open_channel(priv, i, &chs->params, cparam, xsk_pool, &chs->c[i]); + if (err) + goto err_close_channels; + } + + if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS) || chs->params.ptp_rx) { + err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp); + if (err) + goto err_close_channels; + } + + if (priv->htb) { + err = mlx5e_qos_open_queues(priv, chs); + if (err) + goto err_close_ptp; + } + + mlx5e_health_channels_update(priv); + kvfree(cparam); + return 0; + +err_close_ptp: + if (chs->ptp) + mlx5e_ptp_close(chs->ptp); + +err_close_channels: + for (i--; i >= 0; i--) + mlx5e_close_channel(chs->c[i]); + +err_free: + kfree(chs->c); + kvfree(cparam); + chs->num = 0; + return err; +} + +static void mlx5e_activate_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs) +{ + int i; + + for (i = 0; i < chs->num; i++) + mlx5e_activate_channel(chs->c[i]); + + if (priv->htb) + mlx5e_qos_activate_queues(priv); + + for (i = 0; i < chs->num; i++) + mlx5e_trigger_napi_icosq(chs->c[i]); + + if (chs->ptp) + mlx5e_ptp_activate_channel(chs->ptp); +} + +static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs) +{ + int err = 0; + int i; + + for (i = 0; i < chs->num; i++) { + int timeout = err ? 0 : MLX5E_RQ_WQES_TIMEOUT; + struct mlx5e_channel *c = chs->c[i]; + + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + continue; + + err |= mlx5e_wait_for_min_rx_wqes(&c->rq, timeout); + + /* Don't wait on the XSK RQ, because the newer xdpsock sample + * doesn't provide any Fill Ring entries at the setup stage. + */ + } + + return err ? -ETIMEDOUT : 0; +} + +static void mlx5e_deactivate_channels(struct mlx5e_channels *chs) +{ + int i; + + if (chs->ptp) + mlx5e_ptp_deactivate_channel(chs->ptp); + + for (i = 0; i < chs->num; i++) + mlx5e_deactivate_channel(chs->c[i]); +} + +void mlx5e_close_channels(struct mlx5e_channels *chs) +{ + int i; + + ASSERT_RTNL(); + if (chs->ptp) { + mlx5e_ptp_close(chs->ptp); + chs->ptp = NULL; + } + for (i = 0; i < chs->num; i++) + mlx5e_close_channel(chs->c[i]); + + kfree(chs->c); + chs->num = 0; +} + +static int mlx5e_modify_tirs_packet_merge(struct mlx5e_priv *priv) +{ + struct mlx5e_rx_res *res = priv->rx_res; + + return mlx5e_rx_res_packet_merge_set_param(res, &priv->channels.params.packet_merge); +} + +static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_packet_merge); + +static int mlx5e_set_mtu(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, u16 mtu) +{ + u16 hw_mtu = MLX5E_SW2HW_MTU(params, mtu); + int err; + + err = mlx5_set_port_mtu(mdev, hw_mtu, 1); + if (err) + return err; + + /* Update vport context MTU */ + mlx5_modify_nic_vport_mtu(mdev, hw_mtu); + return 0; +} + +static void mlx5e_query_mtu(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, u16 *mtu) +{ + u16 hw_mtu = 0; + int err; + + err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu); + if (err || !hw_mtu) /* fallback to port oper mtu */ + mlx5_query_port_oper_mtu(mdev, &hw_mtu, 1); + + *mtu = MLX5E_HW2SW_MTU(params, hw_mtu); +} + +int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv) +{ + struct mlx5e_params *params = &priv->channels.params; + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + u16 mtu; + int err; + + err = mlx5e_set_mtu(mdev, params, params->sw_mtu); + if (err) + return err; + + mlx5e_query_mtu(mdev, params, &mtu); + if (mtu != params->sw_mtu) + netdev_warn(netdev, "%s: VPort MTU %d is different than netdev mtu %d\n", + __func__, mtu, params->sw_mtu); + + params->sw_mtu = mtu; + return 0; +} + +MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_set_dev_port_mtu); + +void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv) +{ + struct mlx5e_params *params = &priv->channels.params; + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + u16 max_mtu; + + /* MTU range: 68 - hw-specific max */ + netdev->min_mtu = ETH_MIN_MTU; + + mlx5_query_port_max_mtu(mdev, &max_mtu, 1); + netdev->max_mtu = min_t(unsigned int, MLX5E_HW2SW_MTU(params, max_mtu), + ETH_MAX_MTU); +} + +static int mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc, + struct netdev_tc_txq *tc_to_txq) +{ + int tc, err; + + netdev_reset_tc(netdev); + + if (ntc == 1) + return 0; + + err = netdev_set_num_tc(netdev, ntc); + if (err) { + netdev_WARN(netdev, "netdev_set_num_tc failed (%d), ntc = %d\n", err, ntc); + return err; + } + + for (tc = 0; tc < ntc; tc++) { + u16 count, offset; + + count = tc_to_txq[tc].count; + offset = tc_to_txq[tc].offset; + netdev_set_tc_queue(netdev, tc, count, offset); + } + + return 0; +} + +int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv) +{ + int nch, ntc, num_txqs, err; + int qos_queues = 0; + + if (priv->htb) + qos_queues = mlx5e_htb_cur_leaf_nodes(priv->htb); + + nch = priv->channels.params.num_channels; + ntc = mlx5e_get_dcb_num_tc(&priv->channels.params); + num_txqs = nch * ntc + qos_queues; + if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS)) + num_txqs += ntc; + + netdev_dbg(priv->netdev, "Setting num_txqs %d\n", num_txqs); + err = netif_set_real_num_tx_queues(priv->netdev, num_txqs); + if (err) + netdev_warn(priv->netdev, "netif_set_real_num_tx_queues failed, %d\n", err); + + return err; +} + +static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv) +{ + struct netdev_tc_txq old_tc_to_txq[TC_MAX_QUEUE], *tc_to_txq; + struct net_device *netdev = priv->netdev; + int old_num_txqs, old_ntc; + int nch, ntc; + int err; + int i; + + old_num_txqs = netdev->real_num_tx_queues; + old_ntc = netdev->num_tc ? : 1; + for (i = 0; i < ARRAY_SIZE(old_tc_to_txq); i++) + old_tc_to_txq[i] = netdev->tc_to_txq[i]; + + nch = priv->channels.params.num_channels; + ntc = priv->channels.params.mqprio.num_tc; + tc_to_txq = priv->channels.params.mqprio.tc_to_txq; + + err = mlx5e_netdev_set_tcs(netdev, nch, ntc, tc_to_txq); + if (err) + goto err_out; + err = mlx5e_update_tx_netdev_queues(priv); + if (err) + goto err_tcs; + err = netif_set_real_num_rx_queues(netdev, nch); + if (err) { + netdev_warn(netdev, "netif_set_real_num_rx_queues failed, %d\n", err); + goto err_txqs; + } + + return 0; + +err_txqs: + /* netif_set_real_num_rx_queues could fail only when nch increased. Only + * one of nch and ntc is changed in this function. That means, the call + * to netif_set_real_num_tx_queues below should not fail, because it + * decreases the number of TX queues. + */ + WARN_ON_ONCE(netif_set_real_num_tx_queues(netdev, old_num_txqs)); + +err_tcs: + WARN_ON_ONCE(mlx5e_netdev_set_tcs(netdev, old_num_txqs / old_ntc, old_ntc, + old_tc_to_txq)); +err_out: + return err; +} + +static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_update_netdev_queues); + +static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv, + struct mlx5e_params *params) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int num_comp_vectors, ix, irq; + + num_comp_vectors = mlx5_comp_vectors_max(mdev); + + for (ix = 0; ix < params->num_channels; ix++) { + cpumask_clear(priv->scratchpad.cpumask); + + for (irq = ix; irq < num_comp_vectors; irq += params->num_channels) { + int cpu = mlx5_comp_vector_get_cpu(mdev, irq); + + cpumask_set_cpu(cpu, priv->scratchpad.cpumask); + } + + netif_set_xps_queue(priv->netdev, priv->scratchpad.cpumask, ix); + } +} + +static int mlx5e_num_channels_changed(struct mlx5e_priv *priv) +{ + u16 count = priv->channels.params.num_channels; + int err; + + err = mlx5e_update_netdev_queues(priv); + if (err) + return err; + + mlx5e_set_default_xps_cpumasks(priv, &priv->channels.params); + + /* This function may be called on attach, before priv->rx_res is created. */ + if (!netif_is_rxfh_configured(priv->netdev) && priv->rx_res) + mlx5e_rx_res_rss_set_indir_uniform(priv->rx_res, count); + + return 0; +} + +MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_num_channels_changed); + +static void mlx5e_build_txq_maps(struct mlx5e_priv *priv) +{ + int i, ch, tc, num_tc; + + ch = priv->channels.num; + num_tc = mlx5e_get_dcb_num_tc(&priv->channels.params); + + for (i = 0; i < ch; i++) { + for (tc = 0; tc < num_tc; tc++) { + struct mlx5e_channel *c = priv->channels.c[i]; + struct mlx5e_txqsq *sq = &c->sq[tc]; + + priv->txq2sq[sq->txq_ix] = sq; + } + } + + if (!priv->channels.ptp) + goto out; + + if (!test_bit(MLX5E_PTP_STATE_TX, priv->channels.ptp->state)) + goto out; + + for (tc = 0; tc < num_tc; tc++) { + struct mlx5e_ptp *c = priv->channels.ptp; + struct mlx5e_txqsq *sq = &c->ptpsq[tc].txqsq; + + priv->txq2sq[sq->txq_ix] = sq; + } + +out: + /* Make the change to txq2sq visible before the queue is started. + * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE, + * which pairs with this barrier. + */ + smp_wmb(); +} + +void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) +{ + mlx5e_build_txq_maps(priv); + mlx5e_activate_channels(priv, &priv->channels); + mlx5e_xdp_tx_enable(priv); + + /* dev_watchdog() wants all TX queues to be started when the carrier is + * OK, including the ones in range real_num_tx_queues..num_tx_queues-1. + * Make it happy to avoid TX timeout false alarms. + */ + netif_tx_start_all_queues(priv->netdev); + + if (mlx5e_is_vport_rep(priv)) + mlx5e_rep_activate_channels(priv); + + set_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state); + + mlx5e_wait_channels_min_rx_wqes(&priv->channels); + + if (priv->rx_res) + mlx5e_rx_res_channels_activate(priv->rx_res, &priv->channels); +} + +static void mlx5e_cancel_tx_timeout_work(struct mlx5e_priv *priv) +{ + WARN_ON_ONCE(test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state)); + if (current_work() != &priv->tx_timeout_work) + cancel_work_sync(&priv->tx_timeout_work); +} + +void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) +{ + if (priv->rx_res) + mlx5e_rx_res_channels_deactivate(priv->rx_res); + + clear_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state); + mlx5e_cancel_tx_timeout_work(priv); + + if (mlx5e_is_vport_rep(priv)) + mlx5e_rep_deactivate_channels(priv); + + /* The results of ndo_select_queue are unreliable, while netdev config + * is being changed (real_num_tx_queues, num_tc). Stop all queues to + * prevent ndo_start_xmit from being called, so that it can assume that + * the selected queue is always valid. + */ + netif_tx_disable(priv->netdev); + + mlx5e_xdp_tx_disable(priv); + mlx5e_deactivate_channels(&priv->channels); +} + +static int mlx5e_switch_priv_params(struct mlx5e_priv *priv, + struct mlx5e_params *new_params, + mlx5e_fp_preactivate preactivate, + void *context) +{ + struct mlx5e_params old_params; + + old_params = priv->channels.params; + priv->channels.params = *new_params; + + if (preactivate) { + int err; + + err = preactivate(priv, context); + if (err) { + priv->channels.params = old_params; + return err; + } + } + + return 0; +} + +static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *new_chs, + mlx5e_fp_preactivate preactivate, + void *context) +{ + struct net_device *netdev = priv->netdev; + struct mlx5e_channels old_chs; + int carrier_ok; + int err = 0; + + carrier_ok = netif_carrier_ok(netdev); + netif_carrier_off(netdev); + + mlx5e_deactivate_priv_channels(priv); + + old_chs = priv->channels; + priv->channels = *new_chs; + + /* New channels are ready to roll, call the preactivate hook if needed + * to modify HW settings or update kernel parameters. + */ + if (preactivate) { + err = preactivate(priv, context); + if (err) { + priv->channels = old_chs; + goto out; + } + } + + mlx5e_close_channels(&old_chs); + priv->profile->update_rx(priv); + + mlx5e_selq_apply(&priv->selq); +out: + mlx5e_activate_priv_channels(priv); + + /* return carrier back if needed */ + if (carrier_ok) + netif_carrier_on(netdev); + + return err; +} + +int mlx5e_safe_switch_params(struct mlx5e_priv *priv, + struct mlx5e_params *params, + mlx5e_fp_preactivate preactivate, + void *context, bool reset) +{ + struct mlx5e_channels *new_chs; + int err; + + reset &= test_bit(MLX5E_STATE_OPENED, &priv->state); + if (!reset) + return mlx5e_switch_priv_params(priv, params, preactivate, context); + + new_chs = kzalloc(sizeof(*new_chs), GFP_KERNEL); + if (!new_chs) + return -ENOMEM; + new_chs->params = *params; + + mlx5e_selq_prepare_params(&priv->selq, &new_chs->params); + + err = mlx5e_open_channels(priv, new_chs); + if (err) + goto err_cancel_selq; + + err = mlx5e_switch_priv_channels(priv, new_chs, preactivate, context); + if (err) + goto err_close; + + kfree(new_chs); + return 0; + +err_close: + mlx5e_close_channels(new_chs); + +err_cancel_selq: + mlx5e_selq_cancel(&priv->selq); + kfree(new_chs); + return err; +} + +int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv) +{ + return mlx5e_safe_switch_params(priv, &priv->channels.params, NULL, NULL, true); +} + +void mlx5e_timestamp_init(struct mlx5e_priv *priv) +{ + priv->tstamp.tx_type = HWTSTAMP_TX_OFF; + priv->tstamp.rx_filter = HWTSTAMP_FILTER_NONE; +} + +static void mlx5e_modify_admin_state(struct mlx5_core_dev *mdev, + enum mlx5_port_status state) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int vport_admin_state; + + mlx5_set_port_admin_status(mdev, state); + + if (mlx5_eswitch_mode(mdev) == MLX5_ESWITCH_OFFLOADS || + !MLX5_CAP_GEN(mdev, uplink_follow)) + return; + + if (state == MLX5_PORT_UP) + vport_admin_state = MLX5_VPORT_ADMIN_STATE_AUTO; + else + vport_admin_state = MLX5_VPORT_ADMIN_STATE_DOWN; + + mlx5_eswitch_set_vport_state(esw, MLX5_VPORT_UPLINK, vport_admin_state); +} + +int mlx5e_open_locked(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + mlx5e_selq_prepare_params(&priv->selq, &priv->channels.params); + + set_bit(MLX5E_STATE_OPENED, &priv->state); + + err = mlx5e_open_channels(priv, &priv->channels); + if (err) + goto err_clear_state_opened_flag; + + err = priv->profile->update_rx(priv); + if (err) + goto err_close_channels; + + mlx5e_selq_apply(&priv->selq); + mlx5e_activate_priv_channels(priv); + mlx5e_apply_traps(priv, true); + if (priv->profile->update_carrier) + priv->profile->update_carrier(priv); + + mlx5e_queue_update_stats(priv); + return 0; + +err_close_channels: + mlx5e_close_channels(&priv->channels); +err_clear_state_opened_flag: + clear_bit(MLX5E_STATE_OPENED, &priv->state); + mlx5e_selq_cancel(&priv->selq); + return err; +} + +int mlx5e_open(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_open_locked(netdev); + if (!err) + mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_UP); + mutex_unlock(&priv->state_lock); + + return err; +} + +int mlx5e_close_locked(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + /* May already be CLOSED in case a previous configuration operation + * (e.g RX/TX queue size change) that involves close&open failed. + */ + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + mlx5e_apply_traps(priv, false); + clear_bit(MLX5E_STATE_OPENED, &priv->state); + + netif_carrier_off(priv->netdev); + mlx5e_deactivate_priv_channels(priv); + mlx5e_close_channels(&priv->channels); + + return 0; +} + +int mlx5e_close(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + if (!netif_device_present(netdev)) + return -ENODEV; + + mutex_lock(&priv->state_lock); + mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_DOWN); + err = mlx5e_close_locked(netdev); + mutex_unlock(&priv->state_lock); + + return err; +} + +static void mlx5e_free_drop_rq(struct mlx5e_rq *rq) +{ + mlx5_wq_destroy(&rq->wq_ctrl); +} + +static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev, + struct mlx5e_rq *rq, + struct mlx5e_rq_param *param) +{ + void *rqc = param->rqc; + void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); + int err; + + param->wq.db_numa_node = param->wq.buf_numa_node; + + err = mlx5_wq_cyc_create(mdev, ¶m->wq, rqc_wq, &rq->wqe.wq, + &rq->wq_ctrl); + if (err) + return err; + + /* Mark as unused given "Drop-RQ" packets never reach XDP */ + xdp_rxq_info_unused(&rq->xdp_rxq); + + rq->mdev = mdev; + + return 0; +} + +static int mlx5e_alloc_drop_cq(struct mlx5e_priv *priv, + struct mlx5e_cq *cq, + struct mlx5e_cq_param *param) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); + param->wq.db_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); + + return mlx5e_alloc_cq_common(priv, param, cq); +} + +int mlx5e_open_drop_rq(struct mlx5e_priv *priv, + struct mlx5e_rq *drop_rq) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_cq_param cq_param = {}; + struct mlx5e_rq_param rq_param = {}; + struct mlx5e_cq *cq = &drop_rq->cq; + int err; + + mlx5e_build_drop_rq_param(mdev, priv->drop_rq_q_counter, &rq_param); + + err = mlx5e_alloc_drop_cq(priv, cq, &cq_param); + if (err) + return err; + + err = mlx5e_create_cq(cq, &cq_param); + if (err) + goto err_free_cq; + + err = mlx5e_alloc_drop_rq(mdev, drop_rq, &rq_param); + if (err) + goto err_destroy_cq; + + err = mlx5e_create_rq(drop_rq, &rq_param); + if (err) + goto err_free_rq; + + err = mlx5e_modify_rq_state(drop_rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); + if (err) + mlx5_core_warn(priv->mdev, "modify_rq_state failed, rx_if_down_packets won't be counted %d\n", err); + + return 0; + +err_free_rq: + mlx5e_free_drop_rq(drop_rq); + +err_destroy_cq: + mlx5e_destroy_cq(cq); + +err_free_cq: + mlx5e_free_cq(cq); + + return err; +} + +void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq) +{ + mlx5e_destroy_rq(drop_rq); + mlx5e_free_drop_rq(drop_rq); + mlx5e_destroy_cq(&drop_rq->cq); + mlx5e_free_cq(&drop_rq->cq); +} + +int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn) +{ + void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + + MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn); + + if (MLX5_GET(tisc, tisc, tls_en)) + MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.hw_objs.pdn); + + if (mlx5_lag_is_lacp_owner(mdev)) + MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1); + + return mlx5_core_create_tis(mdev, in, tisn); +} + +void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn) +{ + mlx5_core_destroy_tis(mdev, tisn); +} + +void mlx5e_destroy_tises(struct mlx5e_priv *priv) +{ + int tc, i; + + for (i = 0; i < mlx5e_get_num_lag_ports(priv->mdev); i++) + for (tc = 0; tc < priv->profile->max_tc; tc++) + mlx5e_destroy_tis(priv->mdev, priv->tisn[i][tc]); +} + +static bool mlx5e_lag_should_assign_affinity(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_GEN(mdev, lag_tx_port_affinity) && mlx5e_get_num_lag_ports(mdev) > 1; +} + +int mlx5e_create_tises(struct mlx5e_priv *priv) +{ + int tc, i; + int err; + + for (i = 0; i < mlx5e_get_num_lag_ports(priv->mdev); i++) { + for (tc = 0; tc < priv->profile->max_tc; tc++) { + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; + void *tisc; + + tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + + MLX5_SET(tisc, tisc, prio, tc << 1); + + if (mlx5e_lag_should_assign_affinity(priv->mdev)) + MLX5_SET(tisc, tisc, lag_tx_port_affinity, i + 1); + + err = mlx5e_create_tis(priv->mdev, in, &priv->tisn[i][tc]); + if (err) + goto err_close_tises; + } + } + + return 0; + +err_close_tises: + for (; i >= 0; i--) { + for (tc--; tc >= 0; tc--) + mlx5e_destroy_tis(priv->mdev, priv->tisn[i][tc]); + tc = priv->profile->max_tc; + } + + return err; +} + +static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) +{ + if (priv->mqprio_rl) { + mlx5e_mqprio_rl_cleanup(priv->mqprio_rl); + mlx5e_mqprio_rl_free(priv->mqprio_rl); + priv->mqprio_rl = NULL; + } + mlx5e_accel_cleanup_tx(priv); + mlx5e_destroy_tises(priv); +} + +static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) +{ + int err; + int i; + + for (i = 0; i < chs->num; i++) { + err = mlx5e_modify_rq_vsd(&chs->c[i]->rq, vsd); + if (err) + return err; + } + if (chs->ptp && test_bit(MLX5E_PTP_STATE_RX, chs->ptp->state)) + return mlx5e_modify_rq_vsd(&chs->ptp->rq, vsd); + + return 0; +} + +static void mlx5e_mqprio_build_default_tc_to_txq(struct netdev_tc_txq *tc_to_txq, + int ntc, int nch) +{ + int tc; + + memset(tc_to_txq, 0, sizeof(*tc_to_txq) * TC_MAX_QUEUE); + + /* Map netdev TCs to offset 0. + * We have our own UP to TXQ mapping for DCB mode of QoS + */ + for (tc = 0; tc < ntc; tc++) { + tc_to_txq[tc] = (struct netdev_tc_txq) { + .count = nch, + .offset = 0, + }; + } +} + +static void mlx5e_mqprio_build_tc_to_txq(struct netdev_tc_txq *tc_to_txq, + struct tc_mqprio_qopt *qopt) +{ + int tc; + + for (tc = 0; tc < TC_MAX_QUEUE; tc++) { + tc_to_txq[tc] = (struct netdev_tc_txq) { + .count = qopt->count[tc], + .offset = qopt->offset[tc], + }; + } +} + +static void mlx5e_params_mqprio_dcb_set(struct mlx5e_params *params, u8 num_tc) +{ + params->mqprio.mode = TC_MQPRIO_MODE_DCB; + params->mqprio.num_tc = num_tc; + mlx5e_mqprio_build_default_tc_to_txq(params->mqprio.tc_to_txq, num_tc, + params->num_channels); +} + +static void mlx5e_mqprio_rl_update_params(struct mlx5e_params *params, + struct mlx5e_mqprio_rl *rl) +{ + int tc; + + for (tc = 0; tc < TC_MAX_QUEUE; tc++) { + u32 hw_id = 0; + + if (rl) + mlx5e_mqprio_rl_get_node_hw_id(rl, tc, &hw_id); + params->mqprio.channel.hw_id[tc] = hw_id; + } +} + +static void mlx5e_params_mqprio_channel_set(struct mlx5e_params *params, + struct tc_mqprio_qopt_offload *mqprio, + struct mlx5e_mqprio_rl *rl) +{ + int tc; + + params->mqprio.mode = TC_MQPRIO_MODE_CHANNEL; + params->mqprio.num_tc = mqprio->qopt.num_tc; + + for (tc = 0; tc < TC_MAX_QUEUE; tc++) + params->mqprio.channel.max_rate[tc] = mqprio->max_rate[tc]; + + mlx5e_mqprio_rl_update_params(params, rl); + mlx5e_mqprio_build_tc_to_txq(params->mqprio.tc_to_txq, &mqprio->qopt); +} + +static void mlx5e_params_mqprio_reset(struct mlx5e_params *params) +{ + mlx5e_params_mqprio_dcb_set(params, 1); +} + +static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv, + struct tc_mqprio_qopt *mqprio) +{ + struct mlx5e_params new_params; + u8 tc = mqprio->num_tc; + int err; + + mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; + + if (tc && tc != MLX5E_MAX_NUM_TC) + return -EINVAL; + + new_params = priv->channels.params; + mlx5e_params_mqprio_dcb_set(&new_params, tc ? tc : 1); + + err = mlx5e_safe_switch_params(priv, &new_params, + mlx5e_num_channels_changed_ctx, NULL, true); + + if (!err && priv->mqprio_rl) { + mlx5e_mqprio_rl_cleanup(priv->mqprio_rl); + mlx5e_mqprio_rl_free(priv->mqprio_rl); + priv->mqprio_rl = NULL; + } + + priv->max_opened_tc = max_t(u8, priv->max_opened_tc, + mlx5e_get_dcb_num_tc(&priv->channels.params)); + return err; +} + +static int mlx5e_mqprio_channel_validate(struct mlx5e_priv *priv, + struct tc_mqprio_qopt_offload *mqprio) +{ + struct net_device *netdev = priv->netdev; + struct mlx5e_ptp *ptp_channel; + int agg_count = 0; + int i; + + ptp_channel = priv->channels.ptp; + if (ptp_channel && test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state)) { + netdev_err(netdev, + "Cannot activate MQPRIO mode channel since it conflicts with TX port TS\n"); + return -EINVAL; + } + + if (mqprio->qopt.offset[0] != 0 || mqprio->qopt.num_tc < 1 || + mqprio->qopt.num_tc > MLX5E_MAX_NUM_MQPRIO_CH_TC) + return -EINVAL; + + for (i = 0; i < mqprio->qopt.num_tc; i++) { + if (!mqprio->qopt.count[i]) { + netdev_err(netdev, "Zero size for queue-group (%d) is not supported\n", i); + return -EINVAL; + } + if (mqprio->min_rate[i]) { + netdev_err(netdev, "Min tx rate is not supported\n"); + return -EINVAL; + } + + if (mqprio->max_rate[i]) { + int err; + + err = mlx5e_qos_bytes_rate_check(priv->mdev, mqprio->max_rate[i]); + if (err) + return err; + } + + if (mqprio->qopt.offset[i] != agg_count) { + netdev_err(netdev, "Discontinuous queues config is not supported\n"); + return -EINVAL; + } + agg_count += mqprio->qopt.count[i]; + } + + if (priv->channels.params.num_channels != agg_count) { + netdev_err(netdev, "Num of queues (%d) does not match available (%d)\n", + agg_count, priv->channels.params.num_channels); + return -EINVAL; + } + + return 0; +} + +static bool mlx5e_mqprio_rate_limit(u8 num_tc, u64 max_rate[]) +{ + int tc; + + for (tc = 0; tc < num_tc; tc++) + if (max_rate[tc]) + return true; + return false; +} + +static struct mlx5e_mqprio_rl *mlx5e_mqprio_rl_create(struct mlx5_core_dev *mdev, + u8 num_tc, u64 max_rate[]) +{ + struct mlx5e_mqprio_rl *rl; + int err; + + if (!mlx5e_mqprio_rate_limit(num_tc, max_rate)) + return NULL; + + rl = mlx5e_mqprio_rl_alloc(); + if (!rl) + return ERR_PTR(-ENOMEM); + + err = mlx5e_mqprio_rl_init(rl, mdev, num_tc, max_rate); + if (err) { + mlx5e_mqprio_rl_free(rl); + return ERR_PTR(err); + } + + return rl; +} + +static int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv, + struct tc_mqprio_qopt_offload *mqprio) +{ + mlx5e_fp_preactivate preactivate; + struct mlx5e_params new_params; + struct mlx5e_mqprio_rl *rl; + bool nch_changed; + int err; + + err = mlx5e_mqprio_channel_validate(priv, mqprio); + if (err) + return err; + + rl = mlx5e_mqprio_rl_create(priv->mdev, mqprio->qopt.num_tc, mqprio->max_rate); + if (IS_ERR(rl)) + return PTR_ERR(rl); + + new_params = priv->channels.params; + mlx5e_params_mqprio_channel_set(&new_params, mqprio, rl); + + nch_changed = mlx5e_get_dcb_num_tc(&priv->channels.params) > 1; + preactivate = nch_changed ? mlx5e_num_channels_changed_ctx : + mlx5e_update_netdev_queues_ctx; + err = mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, true); + if (err) { + if (rl) { + mlx5e_mqprio_rl_cleanup(rl); + mlx5e_mqprio_rl_free(rl); + } + return err; + } + + if (priv->mqprio_rl) { + mlx5e_mqprio_rl_cleanup(priv->mqprio_rl); + mlx5e_mqprio_rl_free(priv->mqprio_rl); + } + priv->mqprio_rl = rl; + + return 0; +} + +static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, + struct tc_mqprio_qopt_offload *mqprio) +{ + /* MQPRIO is another toplevel qdisc that can't be attached + * simultaneously with the offloaded HTB. + */ + if (WARN_ON(mlx5e_selq_is_htb_enabled(&priv->selq))) + return -EINVAL; + + switch (mqprio->mode) { + case TC_MQPRIO_MODE_DCB: + return mlx5e_setup_tc_mqprio_dcb(priv, &mqprio->qopt); + case TC_MQPRIO_MODE_CHANNEL: + return mlx5e_setup_tc_mqprio_channel(priv, mqprio); + default: + return -EOPNOTSUPP; + } +} + +static LIST_HEAD(mlx5e_block_cb_list); + +static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + bool tc_unbind = false; + int err; + + if (type == TC_SETUP_BLOCK && + ((struct flow_block_offload *)type_data)->command == FLOW_BLOCK_UNBIND) + tc_unbind = true; + + if (!netif_device_present(dev) && !tc_unbind) + return -ENODEV; + + switch (type) { + case TC_SETUP_BLOCK: { + struct flow_block_offload *f = type_data; + + f->unlocked_driver_cb = true; + return flow_block_cb_setup_simple(type_data, + &mlx5e_block_cb_list, + mlx5e_setup_tc_block_cb, + priv, priv, true); + } + case TC_SETUP_QDISC_MQPRIO: + mutex_lock(&priv->state_lock); + err = mlx5e_setup_tc_mqprio(priv, type_data); + mutex_unlock(&priv->state_lock); + return err; + case TC_SETUP_QDISC_HTB: + mutex_lock(&priv->state_lock); + err = mlx5e_htb_setup_tc(priv, type_data); + mutex_unlock(&priv->state_lock); + return err; + default: + return -EOPNOTSUPP; + } +} + +void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s) +{ + int i; + + for (i = 0; i < priv->stats_nch; i++) { + struct mlx5e_channel_stats *channel_stats = priv->channel_stats[i]; + struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq; + struct mlx5e_rq_stats *rq_stats = &channel_stats->rq; + int j; + + s->rx_packets += rq_stats->packets + xskrq_stats->packets; + s->rx_bytes += rq_stats->bytes + xskrq_stats->bytes; + s->multicast += rq_stats->mcast_packets + xskrq_stats->mcast_packets; + + for (j = 0; j < priv->max_opened_tc; j++) { + struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j]; + + s->tx_packets += sq_stats->packets; + s->tx_bytes += sq_stats->bytes; + s->tx_dropped += sq_stats->dropped; + } + } + if (priv->tx_ptp_opened) { + for (i = 0; i < priv->max_opened_tc; i++) { + struct mlx5e_sq_stats *sq_stats = &priv->ptp_stats.sq[i]; + + s->tx_packets += sq_stats->packets; + s->tx_bytes += sq_stats->bytes; + s->tx_dropped += sq_stats->dropped; + } + } + if (priv->rx_ptp_opened) { + struct mlx5e_rq_stats *rq_stats = &priv->ptp_stats.rq; + + s->rx_packets += rq_stats->packets; + s->rx_bytes += rq_stats->bytes; + s->multicast += rq_stats->mcast_packets; + } +} + +void +mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + + if (!netif_device_present(dev)) + return; + + /* In switchdev mode, monitor counters doesn't monitor + * rx/tx stats of 802_3. The update stats mechanism + * should keep the 802_3 layout counters updated + */ + if (!mlx5e_monitor_counter_supported(priv) || + mlx5e_is_uplink_rep(priv)) { + /* update HW stats in background for next time */ + mlx5e_queue_update_stats(priv); + } + + if (mlx5e_is_uplink_rep(priv)) { + struct mlx5e_vport_stats *vstats = &priv->stats.vport; + + stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok); + stats->rx_bytes = PPORT_802_3_GET(pstats, a_octets_received_ok); + stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok); + stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok); + + /* vport multicast also counts packets that are dropped due to steering + * or rx out of buffer + */ + stats->multicast = VPORT_COUNTER_GET(vstats, received_eth_multicast.packets); + } else { + mlx5e_fold_sw_stats64(priv, stats); + } + + stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer; + + stats->rx_length_errors = + PPORT_802_3_GET(pstats, a_in_range_length_errors) + + PPORT_802_3_GET(pstats, a_out_of_range_length_field) + + PPORT_802_3_GET(pstats, a_frame_too_long_errors) + + VNIC_ENV_GET(&priv->stats.vnic, eth_wqe_too_small); + stats->rx_crc_errors = + PPORT_802_3_GET(pstats, a_frame_check_sequence_errors); + stats->rx_frame_errors = PPORT_802_3_GET(pstats, a_alignment_errors); + stats->tx_aborted_errors = PPORT_2863_GET(pstats, if_out_discards); + stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors + + stats->rx_frame_errors; + stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors; +} + +static void mlx5e_nic_set_rx_mode(struct mlx5e_priv *priv) +{ + if (mlx5e_is_uplink_rep(priv)) + return; /* no rx mode for uplink rep */ + + queue_work(priv->wq, &priv->set_rx_mode_work); +} + +static void mlx5e_set_rx_mode(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_nic_set_rx_mode(priv); +} + +static int mlx5e_set_mac(struct net_device *netdev, void *addr) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct sockaddr *saddr = addr; + + if (!is_valid_ether_addr(saddr->sa_data)) + return -EADDRNOTAVAIL; + + netif_addr_lock_bh(netdev); + eth_hw_addr_set(netdev, saddr->sa_data); + netif_addr_unlock_bh(netdev); + + mlx5e_nic_set_rx_mode(priv); + + return 0; +} + +#define MLX5E_SET_FEATURE(features, feature, enable) \ + do { \ + if (enable) \ + *features |= feature; \ + else \ + *features &= ~feature; \ + } while (0) + +typedef int (*mlx5e_feature_handler)(struct net_device *netdev, bool enable); + +static int set_feature_lro(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_params *cur_params; + struct mlx5e_params new_params; + bool reset = true; + int err = 0; + + mutex_lock(&priv->state_lock); + + cur_params = &priv->channels.params; + new_params = *cur_params; + + if (enable) + new_params.packet_merge.type = MLX5E_PACKET_MERGE_LRO; + else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO) + new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE; + else + goto out; + + if (!(cur_params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO && + new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO)) { + if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) == + mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_params, NULL)) + reset = false; + } + } + + err = mlx5e_safe_switch_params(priv, &new_params, + mlx5e_modify_tirs_packet_merge_ctx, NULL, reset); +out: + mutex_unlock(&priv->state_lock); + return err; +} + +static int set_feature_hw_gro(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_params new_params; + bool reset = true; + int err = 0; + + mutex_lock(&priv->state_lock); + new_params = priv->channels.params; + + if (enable) { + new_params.packet_merge.type = MLX5E_PACKET_MERGE_SHAMPO; + new_params.packet_merge.shampo.match_criteria_type = + MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED; + new_params.packet_merge.shampo.alignment_granularity = + MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE; + } else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { + new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE; + } else { + goto out; + } + + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset); +out: + mutex_unlock(&priv->state_lock); + return err; +} + +static int set_feature_cvlan_filter(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + if (enable) + mlx5e_enable_cvlan_filter(priv->fs, + !!(priv->netdev->flags & IFF_PROMISC)); + else + mlx5e_disable_cvlan_filter(priv->fs, + !!(priv->netdev->flags & IFF_PROMISC)); + + return 0; +} + +static int set_feature_hw_tc(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err = 0; + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + int tc_flag = mlx5e_is_uplink_rep(priv) ? MLX5_TC_FLAG(ESW_OFFLOAD) : + MLX5_TC_FLAG(NIC_OFFLOAD); + if (!enable && mlx5e_tc_num_filters(priv, tc_flag)) { + netdev_err(netdev, + "Active offloaded tc filters, can't turn hw_tc_offload off\n"); + return -EINVAL; + } +#endif + + mutex_lock(&priv->state_lock); + if (!enable && mlx5e_selq_is_htb_enabled(&priv->selq)) { + netdev_err(netdev, "Active HTB offload, can't turn hw_tc_offload off\n"); + err = -EINVAL; + } + mutex_unlock(&priv->state_lock); + + return err; +} + +static int set_feature_rx_all(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_set_port_fcs(mdev, !enable); +} + +static int mlx5e_set_rx_port_ts(struct mlx5_core_dev *mdev, bool enable) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {}; + bool supported, curr_state; + int err; + + if (!MLX5_CAP_GEN(mdev, ports_check)) + return 0; + + err = mlx5_query_ports_check(mdev, in, sizeof(in)); + if (err) + return err; + + supported = MLX5_GET(pcmr_reg, in, rx_ts_over_crc_cap); + curr_state = MLX5_GET(pcmr_reg, in, rx_ts_over_crc); + + if (!supported || enable == curr_state) + return 0; + + MLX5_SET(pcmr_reg, in, local_port, 1); + MLX5_SET(pcmr_reg, in, rx_ts_over_crc, enable); + + return mlx5_set_ports_check(mdev, in, sizeof(in)); +} + +static int mlx5e_set_rx_port_ts_wrap(struct mlx5e_priv *priv, void *ctx) +{ + struct mlx5_core_dev *mdev = priv->mdev; + bool enable = *(bool *)ctx; + + return mlx5e_set_rx_port_ts(mdev, enable); +} + +static int set_feature_rx_fcs(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_channels *chs = &priv->channels; + struct mlx5e_params new_params; + int err; + bool rx_ts_over_crc = !enable; + + mutex_lock(&priv->state_lock); + + new_params = chs->params; + new_params.scatter_fcs_en = enable; + err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_set_rx_port_ts_wrap, + &rx_ts_over_crc, true); + mutex_unlock(&priv->state_lock); + return err; +} + +static int set_feature_rx_vlan(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err = 0; + + mutex_lock(&priv->state_lock); + + mlx5e_fs_set_vlan_strip_disable(priv->fs, !enable); + priv->channels.params.vlan_strip_disable = !enable; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + err = mlx5e_modify_channels_vsd(&priv->channels, !enable); + if (err) { + mlx5e_fs_set_vlan_strip_disable(priv->fs, enable); + priv->channels.params.vlan_strip_disable = enable; + } +unlock: + mutex_unlock(&priv->state_lock); + + return err; +} + +int mlx5e_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_flow_steering *fs = priv->fs; + + if (mlx5e_is_uplink_rep(priv)) + return 0; /* no vlan table for uplink rep */ + + return mlx5e_fs_vlan_rx_add_vid(fs, dev, proto, vid); +} + +int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_flow_steering *fs = priv->fs; + + if (mlx5e_is_uplink_rep(priv)) + return 0; /* no vlan table for uplink rep */ + + return mlx5e_fs_vlan_rx_kill_vid(fs, dev, proto, vid); +} + +#ifdef CONFIG_MLX5_EN_ARFS +static int set_feature_arfs(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + if (enable) + err = mlx5e_arfs_enable(priv->fs); + else + err = mlx5e_arfs_disable(priv->fs); + + return err; +} +#endif + +static int mlx5e_handle_feature(struct net_device *netdev, + netdev_features_t *features, + netdev_features_t feature, + mlx5e_feature_handler feature_handler) +{ + netdev_features_t changes = *features ^ netdev->features; + bool enable = !!(*features & feature); + int err; + + if (!(changes & feature)) + return 0; + + err = feature_handler(netdev, enable); + if (err) { + MLX5E_SET_FEATURE(features, feature, !enable); + netdev_err(netdev, "%s feature %pNF failed, err %d\n", + enable ? "Enable" : "Disable", &feature, err); + return err; + } + + return 0; +} + +void mlx5e_set_xdp_feature(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_params *params = &priv->channels.params; + xdp_features_t val; + + if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE) { + xdp_clear_features_flag(netdev); + return; + } + + val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_XSK_ZEROCOPY | + NETDEV_XDP_ACT_RX_SG | + NETDEV_XDP_ACT_NDO_XMIT | + NETDEV_XDP_ACT_NDO_XMIT_SG; + xdp_set_features_flag(netdev, val); +} + +int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) +{ + netdev_features_t oper_features = features; + int err = 0; + +#define MLX5E_HANDLE_FEATURE(feature, handler) \ + mlx5e_handle_feature(netdev, &oper_features, feature, handler) + + err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro); + err |= MLX5E_HANDLE_FEATURE(NETIF_F_GRO_HW, set_feature_hw_gro); + err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER, + set_feature_cvlan_filter); + err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_hw_tc); + err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all); + err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXFCS, set_feature_rx_fcs); + err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan); +#ifdef CONFIG_MLX5_EN_ARFS + err |= MLX5E_HANDLE_FEATURE(NETIF_F_NTUPLE, set_feature_arfs); +#endif + err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TLS_RX, mlx5e_ktls_set_feature_rx); + + if (err) { + netdev->features = oper_features; + return -EINVAL; + } + + /* update XDP supported features */ + mlx5e_set_xdp_feature(netdev); + + return 0; +} + +static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev, + netdev_features_t features) +{ + features &= ~NETIF_F_HW_TLS_RX; + if (netdev->features & NETIF_F_HW_TLS_RX) + netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n"); + + features &= ~NETIF_F_HW_TLS_TX; + if (netdev->features & NETIF_F_HW_TLS_TX) + netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n"); + + features &= ~NETIF_F_NTUPLE; + if (netdev->features & NETIF_F_NTUPLE) + netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n"); + + features &= ~NETIF_F_GRO_HW; + if (netdev->features & NETIF_F_GRO_HW) + netdev_warn(netdev, "Disabling HW_GRO, not supported in switchdev mode\n"); + + features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; + if (netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER) + netdev_warn(netdev, "Disabling HW_VLAN CTAG FILTERING, not supported in switchdev mode\n"); + + return features; +} + +static netdev_features_t mlx5e_fix_features(struct net_device *netdev, + netdev_features_t features) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_vlan_table *vlan; + struct mlx5e_params *params; + + if (!netif_device_present(netdev)) + return features; + + vlan = mlx5e_fs_get_vlan(priv->fs); + mutex_lock(&priv->state_lock); + params = &priv->channels.params; + if (!vlan || + !bitmap_empty(mlx5e_vlan_get_active_svlans(vlan), VLAN_N_VID)) { + /* HW strips the outer C-tag header, this is a problem + * for S-tag traffic. + */ + features &= ~NETIF_F_HW_VLAN_CTAG_RX; + if (!params->vlan_strip_disable) + netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n"); + } + + if (!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ)) { + if (features & NETIF_F_LRO) { + netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n"); + features &= ~NETIF_F_LRO; + } + if (features & NETIF_F_GRO_HW) { + netdev_warn(netdev, "Disabling HW-GRO, not supported in legacy RQ\n"); + features &= ~NETIF_F_GRO_HW; + } + } + + if (params->xdp_prog) { + if (features & NETIF_F_LRO) { + netdev_warn(netdev, "LRO is incompatible with XDP\n"); + features &= ~NETIF_F_LRO; + } + if (features & NETIF_F_GRO_HW) { + netdev_warn(netdev, "HW GRO is incompatible with XDP\n"); + features &= ~NETIF_F_GRO_HW; + } + } + + if (priv->xsk.refcnt) { + if (features & NETIF_F_LRO) { + netdev_warn(netdev, "LRO is incompatible with AF_XDP (%u XSKs are active)\n", + priv->xsk.refcnt); + features &= ~NETIF_F_LRO; + } + if (features & NETIF_F_GRO_HW) { + netdev_warn(netdev, "HW GRO is incompatible with AF_XDP (%u XSKs are active)\n", + priv->xsk.refcnt); + features &= ~NETIF_F_GRO_HW; + } + } + + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { + features &= ~NETIF_F_RXHASH; + if (netdev->features & NETIF_F_RXHASH) + netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n"); + + if (features & NETIF_F_GRO_HW) { + netdev_warn(netdev, "Disabling HW-GRO, not supported when CQE compress is active\n"); + features &= ~NETIF_F_GRO_HW; + } + } + + if (mlx5e_is_uplink_rep(priv)) { + features = mlx5e_fix_uplink_rep_features(netdev, features); + features |= NETIF_F_NETNS_LOCAL; + } else { + features &= ~NETIF_F_NETNS_LOCAL; + } + + mutex_unlock(&priv->state_lock); + + return features; +} + +static bool mlx5e_xsk_validate_mtu(struct net_device *netdev, + struct mlx5e_channels *chs, + struct mlx5e_params *new_params, + struct mlx5_core_dev *mdev) +{ + u16 ix; + + for (ix = 0; ix < chs->params.num_channels; ix++) { + struct xsk_buff_pool *xsk_pool = + mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, ix); + struct mlx5e_xsk_param xsk; + int max_xdp_mtu; + + if (!xsk_pool) + continue; + + mlx5e_build_xsk_param(xsk_pool, &xsk); + max_xdp_mtu = mlx5e_xdp_max_mtu(new_params, &xsk); + + /* Validate XSK params and XDP MTU in advance */ + if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev) || + new_params->sw_mtu > max_xdp_mtu) { + u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk); + int max_mtu_frame, max_mtu_page, max_mtu; + + /* Two criteria must be met: + * 1. HW MTU + all headrooms <= XSK frame size. + * 2. Size of SKBs allocated on XDP_PASS <= PAGE_SIZE. + */ + max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr); + max_mtu_page = MLX5E_HW2SW_MTU(new_params, SKB_MAX_HEAD(0)); + max_mtu = min3(max_mtu_frame, max_mtu_page, max_xdp_mtu); + + netdev_err(netdev, "MTU %d is too big for an XSK running on channel %u or its redirection XDP program. Try MTU <= %d\n", + new_params->sw_mtu, ix, max_mtu); + return false; + } + } + + return true; +} + +static bool mlx5e_params_validate_xdp(struct net_device *netdev, + struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + bool is_linear; + + /* No XSK params: AF_XDP can't be enabled yet at the point of setting + * the XDP program. + */ + is_linear = params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC ? + mlx5e_rx_is_linear_skb(mdev, params, NULL) : + mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL); + + if (!is_linear) { + if (!params->xdp_prog->aux->xdp_has_frags) { + netdev_warn(netdev, "MTU(%d) > %d, too big for an XDP program not aware of multi buffer\n", + params->sw_mtu, + mlx5e_xdp_max_mtu(params, NULL)); + return false; + } + if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && + !mlx5e_verify_params_rx_mpwqe_strides(mdev, params, NULL)) { + netdev_warn(netdev, "XDP is not allowed with striding RQ and MTU(%d) > %d\n", + params->sw_mtu, + mlx5e_xdp_max_mtu(params, NULL)); + return false; + } + } + + return true; +} + +int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, + mlx5e_fp_preactivate preactivate) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_params new_params; + struct mlx5e_params *params; + bool reset = true; + int err = 0; + + mutex_lock(&priv->state_lock); + + params = &priv->channels.params; + + new_params = *params; + new_params.sw_mtu = new_mtu; + err = mlx5e_validate_params(priv->mdev, &new_params); + if (err) + goto out; + + if (new_params.xdp_prog && !mlx5e_params_validate_xdp(netdev, priv->mdev, + &new_params)) { + err = -EINVAL; + goto out; + } + + if (priv->xsk.refcnt && + !mlx5e_xsk_validate_mtu(netdev, &priv->channels, + &new_params, priv->mdev)) { + err = -EINVAL; + goto out; + } + + if (params->packet_merge.type == MLX5E_PACKET_MERGE_LRO) + reset = false; + + if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && + params->packet_merge.type != MLX5E_PACKET_MERGE_SHAMPO) { + bool is_linear_old = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, params, NULL); + bool is_linear_new = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, + &new_params, NULL); + u8 sz_old = mlx5e_mpwqe_get_log_rq_size(priv->mdev, params, NULL); + u8 sz_new = mlx5e_mpwqe_get_log_rq_size(priv->mdev, &new_params, NULL); + + /* Always reset in linear mode - hw_mtu is used in data path. + * Check that the mode was non-linear and didn't change. + * If XSK is active, XSK RQs are linear. + * Reset if the RQ size changed, even if it's non-linear. + */ + if (!is_linear_old && !is_linear_new && !priv->xsk.refcnt && + sz_old == sz_new) + reset = false; + } + + err = mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, reset); + +out: + netdev->mtu = params->sw_mtu; + mutex_unlock(&priv->state_lock); + return err; +} + +static int mlx5e_change_nic_mtu(struct net_device *netdev, int new_mtu) +{ + return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx); +} + +int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx) +{ + bool set = *(bool *)ctx; + + return mlx5e_ptp_rx_manage_fs(priv, set); +} + +static int mlx5e_hwstamp_config_no_ptp_rx(struct mlx5e_priv *priv, bool rx_filter) +{ + bool rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def; + int err; + + if (!rx_filter) + /* Reset CQE compression to Admin default */ + return mlx5e_modify_rx_cqe_compression_locked(priv, rx_cqe_compress_def, false); + + if (!MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS)) + return 0; + + /* Disable CQE compression */ + netdev_warn(priv->netdev, "Disabling RX cqe compression\n"); + err = mlx5e_modify_rx_cqe_compression_locked(priv, false, true); + if (err) + netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err); + + return err; +} + +static int mlx5e_hwstamp_config_ptp_rx(struct mlx5e_priv *priv, bool ptp_rx) +{ + struct mlx5e_params new_params; + + if (ptp_rx == priv->channels.params.ptp_rx) + return 0; + + new_params = priv->channels.params; + new_params.ptp_rx = ptp_rx; + return mlx5e_safe_switch_params(priv, &new_params, mlx5e_ptp_rx_manage_fs_ctx, + &new_params.ptp_rx, true); +} + +int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) +{ + struct hwtstamp_config config; + bool rx_cqe_compress_def; + bool ptp_rx; + int err; + + if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) || + (mlx5_clock_get_ptp_index(priv->mdev) == -1)) + return -EOPNOTSUPP; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + /* TX HW timestamp */ + switch (config.tx_type) { + case HWTSTAMP_TX_OFF: + case HWTSTAMP_TX_ON: + break; + default: + return -ERANGE; + } + + mutex_lock(&priv->state_lock); + rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def; + + /* RX HW timestamp */ + switch (config.rx_filter) { + case HWTSTAMP_FILTER_NONE: + ptp_rx = false; + break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_NTP_ALL: + config.rx_filter = HWTSTAMP_FILTER_ALL; + /* ptp_rx is set if both HW TS is set and CQE + * compression is set + */ + ptp_rx = rx_cqe_compress_def; + break; + default: + err = -ERANGE; + goto err_unlock; + } + + if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX)) + err = mlx5e_hwstamp_config_no_ptp_rx(priv, + config.rx_filter != HWTSTAMP_FILTER_NONE); + else + err = mlx5e_hwstamp_config_ptp_rx(priv, ptp_rx); + if (err) + goto err_unlock; + + memcpy(&priv->tstamp, &config, sizeof(config)); + mutex_unlock(&priv->state_lock); + + /* might need to fix some features */ + netdev_update_features(priv->netdev); + + return copy_to_user(ifr->ifr_data, &config, + sizeof(config)) ? -EFAULT : 0; +err_unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr) +{ + struct hwtstamp_config *cfg = &priv->tstamp; + + if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) + return -EOPNOTSUPP; + + return copy_to_user(ifr->ifr_data, cfg, sizeof(*cfg)) ? -EFAULT : 0; +} + +static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + switch (cmd) { + case SIOCSHWTSTAMP: + return mlx5e_hwstamp_set(priv, ifr); + case SIOCGHWTSTAMP: + return mlx5e_hwstamp_get(priv, ifr); + default: + return -EOPNOTSUPP; + } +} + +#ifdef CONFIG_MLX5_ESWITCH +int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac); +} + +static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + + return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1, + vlan, qos); +} + +static int mlx5e_set_vf_spoofchk(struct net_device *dev, int vf, bool setting) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_spoofchk(mdev->priv.eswitch, vf + 1, setting); +} + +static int mlx5e_set_vf_trust(struct net_device *dev, int vf, bool setting) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_trust(mdev->priv.eswitch, vf + 1, setting); +} + +int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, + int max_tx_rate) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_rate(mdev->priv.eswitch, vf + 1, + max_tx_rate, min_tx_rate); +} + +static int mlx5_vport_link2ifla(u8 esw_link) +{ + switch (esw_link) { + case MLX5_VPORT_ADMIN_STATE_DOWN: + return IFLA_VF_LINK_STATE_DISABLE; + case MLX5_VPORT_ADMIN_STATE_UP: + return IFLA_VF_LINK_STATE_ENABLE; + } + return IFLA_VF_LINK_STATE_AUTO; +} + +static int mlx5_ifla_link2vport(u8 ifla_link) +{ + switch (ifla_link) { + case IFLA_VF_LINK_STATE_DISABLE: + return MLX5_VPORT_ADMIN_STATE_DOWN; + case IFLA_VF_LINK_STATE_ENABLE: + return MLX5_VPORT_ADMIN_STATE_UP; + } + return MLX5_VPORT_ADMIN_STATE_AUTO; +} + +static int mlx5e_set_vf_link_state(struct net_device *dev, int vf, + int link_state) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + if (mlx5e_is_uplink_rep(priv)) + return -EOPNOTSUPP; + + return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1, + mlx5_ifla_link2vport(link_state)); +} + +int mlx5e_get_vf_config(struct net_device *dev, + int vf, struct ifla_vf_info *ivi) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + if (!netif_device_present(dev)) + return -EOPNOTSUPP; + + err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi); + if (err) + return err; + ivi->linkstate = mlx5_vport_link2ifla(ivi->linkstate); + return 0; +} + +int mlx5e_get_vf_stats(struct net_device *dev, + int vf, struct ifla_vf_stats *vf_stats) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1, + vf_stats); +} + +static bool +mlx5e_has_offload_stats(const struct net_device *dev, int attr_id) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (!netif_device_present(dev)) + return false; + + if (!mlx5e_is_uplink_rep(priv)) + return false; + + return mlx5e_rep_has_offload_stats(dev, attr_id); +} + +static int +mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (!mlx5e_is_uplink_rep(priv)) + return -EOPNOTSUPP; + + return mlx5e_rep_get_offload_stats(attr_id, dev, sp); +} +#endif + +static bool mlx5e_tunnel_proto_supported_tx(struct mlx5_core_dev *mdev, u8 proto_type) +{ + switch (proto_type) { + case IPPROTO_GRE: + return MLX5_CAP_ETH(mdev, tunnel_stateless_gre); + case IPPROTO_IPIP: + case IPPROTO_IPV6: + return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) || + MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_tx)); + default: + return false; + } +} + +static bool mlx5e_gre_tunnel_inner_proto_offload_supported(struct mlx5_core_dev *mdev, + struct sk_buff *skb) +{ + switch (skb->inner_protocol) { + case htons(ETH_P_IP): + case htons(ETH_P_IPV6): + case htons(ETH_P_TEB): + return true; + case htons(ETH_P_MPLS_UC): + case htons(ETH_P_MPLS_MC): + return MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre); + } + return false; +} + +static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, + struct sk_buff *skb, + netdev_features_t features) +{ + unsigned int offset = 0; + struct udphdr *udph; + u8 proto; + u16 port; + + switch (vlan_get_protocol(skb)) { + case htons(ETH_P_IP): + proto = ip_hdr(skb)->protocol; + break; + case htons(ETH_P_IPV6): + proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL); + break; + default: + goto out; + } + + switch (proto) { + case IPPROTO_GRE: + if (mlx5e_gre_tunnel_inner_proto_offload_supported(priv->mdev, skb)) + return features; + break; + case IPPROTO_IPIP: + case IPPROTO_IPV6: + if (mlx5e_tunnel_proto_supported_tx(priv->mdev, IPPROTO_IPIP)) + return features; + break; + case IPPROTO_UDP: + udph = udp_hdr(skb); + port = be16_to_cpu(udph->dest); + + /* Verify if UDP port is being offloaded by HW */ + if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, port)) + return features; + +#if IS_ENABLED(CONFIG_GENEVE) + /* Support Geneve offload for default UDP port */ + if (port == GENEVE_UDP_PORT && mlx5_geneve_tx_allowed(priv->mdev)) + return features; +#endif + break; +#ifdef CONFIG_MLX5_EN_IPSEC + case IPPROTO_ESP: + return mlx5e_ipsec_feature_check(skb, features); +#endif + } + +out: + /* Disable CSUM and GSO if the udp dport is not offloaded by HW */ + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); +} + +netdev_features_t mlx5e_features_check(struct sk_buff *skb, + struct net_device *netdev, + netdev_features_t features) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + features = vlan_features_check(skb, features); + features = vxlan_features_check(skb, features); + + /* Validate if the tunneled packet is being offloaded by HW */ + if (skb->encapsulation && + (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK)) + return mlx5e_tunnel_features_check(priv, skb, features); + + return features; +} + +static void mlx5e_tx_timeout_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + tx_timeout_work); + struct net_device *netdev = priv->netdev; + int i; + + /* Take rtnl_lock to ensure no change in netdev->real_num_tx_queues + * through this flow. However, channel closing flows have to wait for + * this work to finish while holding rtnl lock too. So either get the + * lock or find that channels are being closed for other reason and + * this work is not relevant anymore. + */ + while (!rtnl_trylock()) { + if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state)) + return; + msleep(20); + } + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + for (i = 0; i < netdev->real_num_tx_queues; i++) { + struct netdev_queue *dev_queue = + netdev_get_tx_queue(netdev, i); + struct mlx5e_txqsq *sq = priv->txq2sq[i]; + + if (!netif_xmit_stopped(dev_queue)) + continue; + + if (mlx5e_reporter_tx_timeout(sq)) + /* break if tried to reopened channels */ + break; + } + +unlock: + rtnl_unlock(); +} + +static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + netdev_err(dev, "TX timeout detected\n"); + queue_work(priv->wq, &priv->tx_timeout_work); +} + +static int mlx5e_xdp_allowed(struct net_device *netdev, struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE) { + netdev_warn(netdev, "can't set XDP while HW-GRO/LRO is on, disable them first\n"); + return -EINVAL; + } + + if (!mlx5e_params_validate_xdp(netdev, mdev, params)) + return -EINVAL; + + return 0; +} + +static void mlx5e_rq_replace_xdp_prog(struct mlx5e_rq *rq, struct bpf_prog *prog) +{ + struct bpf_prog *old_prog; + + old_prog = rcu_replace_pointer(rq->xdp_prog, prog, + lockdep_is_held(&rq->priv->state_lock)); + if (old_prog) + bpf_prog_put(old_prog); +} + +static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_params new_params; + struct bpf_prog *old_prog; + int err = 0; + bool reset; + int i; + + mutex_lock(&priv->state_lock); + + new_params = priv->channels.params; + new_params.xdp_prog = prog; + + if (prog) { + err = mlx5e_xdp_allowed(netdev, priv->mdev, &new_params); + if (err) + goto unlock; + } + + /* no need for full reset when exchanging programs */ + reset = (!priv->channels.params.xdp_prog || !prog); + + old_prog = priv->channels.params.xdp_prog; + + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset); + if (err) + goto unlock; + + if (old_prog) + bpf_prog_put(old_prog); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset) + goto unlock; + + /* exchanging programs w/o reset, we update ref counts on behalf + * of the channels RQs here. + */ + bpf_prog_add(prog, priv->channels.num); + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; + + mlx5e_rq_replace_xdp_prog(&c->rq, prog); + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) { + bpf_prog_inc(prog); + mlx5e_rq_replace_xdp_prog(&c->xskrq, prog); + } + } + +unlock: + mutex_unlock(&priv->state_lock); + + /* Need to fix some features. */ + if (!err) + netdev_update_features(netdev); + + return err; +} + +static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return mlx5e_xdp_set(dev, xdp->prog); + case XDP_SETUP_XSK_POOL: + return mlx5e_xsk_setup_pool(dev, xdp->xsk.pool, + xdp->xsk.queue_id); + default: + return -EINVAL; + } +} + +#ifdef CONFIG_MLX5_ESWITCH +static int mlx5e_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u32 filter_mask, + int nlflags) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 mode, setting; + int err; + + err = mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting); + if (err) + return err; + mode = setting ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB; + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, + mode, + 0, 0, nlflags, filter_mask, NULL); +} + +static int mlx5e_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, + u16 flags, struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + struct nlattr *attr, *br_spec; + u16 mode = BRIDGE_MODE_UNDEF; + u8 setting; + int rem; + + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!br_spec) + return -EINVAL; + + nla_for_each_nested(attr, br_spec, rem) { + if (nla_type(attr) != IFLA_BRIDGE_MODE) + continue; + + mode = nla_get_u16(attr); + if (mode > BRIDGE_MODE_VEPA) + return -EINVAL; + + break; + } + + if (mode == BRIDGE_MODE_UNDEF) + return -EINVAL; + + setting = (mode == BRIDGE_MODE_VEPA) ? 1 : 0; + return mlx5_eswitch_set_vepa(mdev->priv.eswitch, setting); +} +#endif + +const struct net_device_ops mlx5e_netdev_ops = { + .ndo_open = mlx5e_open, + .ndo_stop = mlx5e_close, + .ndo_start_xmit = mlx5e_xmit, + .ndo_setup_tc = mlx5e_setup_tc, + .ndo_select_queue = mlx5e_select_queue, + .ndo_get_stats64 = mlx5e_get_stats, + .ndo_set_rx_mode = mlx5e_set_rx_mode, + .ndo_set_mac_address = mlx5e_set_mac, + .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid, + .ndo_set_features = mlx5e_set_features, + .ndo_fix_features = mlx5e_fix_features, + .ndo_change_mtu = mlx5e_change_nic_mtu, + .ndo_eth_ioctl = mlx5e_ioctl, + .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate, + .ndo_features_check = mlx5e_features_check, + .ndo_tx_timeout = mlx5e_tx_timeout, + .ndo_bpf = mlx5e_xdp, + .ndo_xdp_xmit = mlx5e_xdp_xmit, + .ndo_xsk_wakeup = mlx5e_xsk_wakeup, +#ifdef CONFIG_MLX5_EN_ARFS + .ndo_rx_flow_steer = mlx5e_rx_flow_steer, +#endif +#ifdef CONFIG_MLX5_ESWITCH + .ndo_bridge_setlink = mlx5e_bridge_setlink, + .ndo_bridge_getlink = mlx5e_bridge_getlink, + + /* SRIOV E-Switch NDOs */ + .ndo_set_vf_mac = mlx5e_set_vf_mac, + .ndo_set_vf_vlan = mlx5e_set_vf_vlan, + .ndo_set_vf_spoofchk = mlx5e_set_vf_spoofchk, + .ndo_set_vf_trust = mlx5e_set_vf_trust, + .ndo_set_vf_rate = mlx5e_set_vf_rate, + .ndo_get_vf_config = mlx5e_get_vf_config, + .ndo_set_vf_link_state = mlx5e_set_vf_link_state, + .ndo_get_vf_stats = mlx5e_get_vf_stats, + .ndo_has_offload_stats = mlx5e_has_offload_stats, + .ndo_get_offload_stats = mlx5e_get_offload_stats, +#endif +}; + +static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) +{ + int i; + + /* The supported periods are organized in ascending order */ + for (i = 0; i < MLX5E_LRO_TIMEOUT_ARR_SIZE - 1; i++) + if (MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]) >= wanted_timeout) + break; + + return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]); +} + +void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu) +{ + struct mlx5e_params *params = &priv->channels.params; + struct mlx5_core_dev *mdev = priv->mdev; + u8 rx_cq_period_mode; + + params->sw_mtu = mtu; + params->hard_mtu = MLX5E_ETH_HARD_MTU; + params->num_channels = min_t(unsigned int, MLX5E_MAX_NUM_CHANNELS / 2, + priv->max_nch); + mlx5e_params_mqprio_reset(params); + + /* SQ */ + params->log_sq_size = is_kdump_kernel() ? + MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : + MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE, mlx5e_tx_mpwqe_supported(mdev)); + + /* XDP SQ */ + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE, mlx5e_tx_mpwqe_supported(mdev)); + + /* set CQE compression */ + params->rx_cqe_compress_def = false; + if (MLX5_CAP_GEN(mdev, cqe_compression) && + MLX5_CAP_GEN(mdev, vport_group_manager)) + params->rx_cqe_compress_def = slow_pci_heuristic(mdev); + + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def); + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, false); + + /* RQ */ + mlx5e_build_rq_params(mdev, params); + + params->terminate_lkey_be = mlx5_core_get_terminate_scatter_list_mkey(mdev); + + params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); + + /* CQ moderation params */ + rx_cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? + MLX5_CQ_PERIOD_MODE_START_FROM_CQE : + MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + params->tx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + mlx5e_set_rx_cq_mode_params(params, rx_cq_period_mode); + mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); + + /* TX inline */ + mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); + + /* AF_XDP */ + params->xsk = xsk; + + /* Do not update netdev->features directly in here + * on mlx5e_attach_netdev() we will call mlx5e_update_features() + * To update netdev->features please modify mlx5e_fix_features() + */ +} + +static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + u8 addr[ETH_ALEN]; + + mlx5_query_mac_address(priv->mdev, addr); + if (is_zero_ether_addr(addr) && + !MLX5_CAP_GEN(priv->mdev, vport_group_manager)) { + eth_hw_addr_random(netdev); + mlx5_core_info(priv->mdev, "Assigned random MAC address %pM\n", netdev->dev_addr); + return; + } + + eth_hw_addr_set(netdev, addr); +} + +static int mlx5e_vxlan_set_port(struct net_device *netdev, unsigned int table, + unsigned int entry, struct udp_tunnel_info *ti) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5_vxlan_add_port(priv->mdev->vxlan, ntohs(ti->port)); +} + +static int mlx5e_vxlan_unset_port(struct net_device *netdev, unsigned int table, + unsigned int entry, struct udp_tunnel_info *ti) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5_vxlan_del_port(priv->mdev->vxlan, ntohs(ti->port)); +} + +void mlx5e_vxlan_set_netdev_info(struct mlx5e_priv *priv) +{ + if (!mlx5_vxlan_allowed(priv->mdev->vxlan)) + return; + + priv->nic_info.set_port = mlx5e_vxlan_set_port; + priv->nic_info.unset_port = mlx5e_vxlan_unset_port; + priv->nic_info.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP | + UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN; + priv->nic_info.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN; + /* Don't count the space hard-coded to the IANA port */ + priv->nic_info.tables[0].n_entries = + mlx5_vxlan_max_udp_ports(priv->mdev) - 1; + + priv->netdev->udp_tunnel_nic_info = &priv->nic_info; +} + +static bool mlx5e_tunnel_any_tx_proto_supported(struct mlx5_core_dev *mdev) +{ + int tt; + + for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { + if (mlx5e_tunnel_proto_supported_tx(mdev, mlx5_get_proto_by_tunnel_type(tt))) + return true; + } + return (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)); +} + +static void mlx5e_build_nic_netdev(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + bool fcs_supported; + bool fcs_enabled; + + SET_NETDEV_DEV(netdev, mdev->device); + + netdev->netdev_ops = &mlx5e_netdev_ops; + netdev->xdp_metadata_ops = &mlx5e_xdp_metadata_ops; + + mlx5e_dcbnl_build_netdev(netdev); + + netdev->watchdog_timeo = 15 * HZ; + + netdev->ethtool_ops = &mlx5e_ethtool_ops; + + netdev->vlan_features |= NETIF_F_SG; + netdev->vlan_features |= NETIF_F_HW_CSUM; + netdev->vlan_features |= NETIF_F_HW_MACSEC; + netdev->vlan_features |= NETIF_F_GRO; + netdev->vlan_features |= NETIF_F_TSO; + netdev->vlan_features |= NETIF_F_TSO6; + netdev->vlan_features |= NETIF_F_RXCSUM; + netdev->vlan_features |= NETIF_F_RXHASH; + netdev->vlan_features |= NETIF_F_GSO_PARTIAL; + + netdev->mpls_features |= NETIF_F_SG; + netdev->mpls_features |= NETIF_F_HW_CSUM; + netdev->mpls_features |= NETIF_F_TSO; + netdev->mpls_features |= NETIF_F_TSO6; + + netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_TX; + netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_RX; + + /* Tunneled LRO is not supported in the driver, and the same RQs are + * shared between inner and outer TIRs, so the driver can't disable LRO + * for inner TIRs while having it enabled for outer TIRs. Due to this, + * block LRO altogether if the firmware declares tunneled LRO support. + */ + if (!!MLX5_CAP_ETH(mdev, lro_cap) && + !MLX5_CAP_ETH(mdev, tunnel_lro_vxlan) && + !MLX5_CAP_ETH(mdev, tunnel_lro_gre) && + mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT, + MLX5E_MPWRQ_UMR_MODE_ALIGNED)) + netdev->vlan_features |= NETIF_F_LRO; + + netdev->hw_features = netdev->vlan_features; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; + netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; + + if (mlx5e_tunnel_any_tx_proto_supported(mdev)) { + netdev->hw_enc_features |= NETIF_F_HW_CSUM; + netdev->hw_enc_features |= NETIF_F_TSO; + netdev->hw_enc_features |= NETIF_F_TSO6; + netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL; + } + + if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) { + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + } + + if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) { + netdev->hw_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + netdev->hw_enc_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + netdev->gso_partial_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + } + + if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_IPIP)) { + netdev->hw_features |= NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_IPXIP6; + netdev->hw_enc_features |= NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_IPXIP6; + netdev->gso_partial_features |= NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_IPXIP6; + } + + netdev->gso_partial_features |= NETIF_F_GSO_UDP_L4; + netdev->hw_features |= NETIF_F_GSO_UDP_L4; + netdev->features |= NETIF_F_GSO_UDP_L4; + + mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled); + + if (fcs_supported) + netdev->hw_features |= NETIF_F_RXALL; + + if (MLX5_CAP_ETH(mdev, scatter_fcs)) + netdev->hw_features |= NETIF_F_RXFCS; + + if (mlx5_qos_is_supported(mdev)) + netdev->hw_features |= NETIF_F_HW_TC; + + netdev->features = netdev->hw_features; + + /* Defaults */ + if (fcs_enabled) + netdev->features &= ~NETIF_F_RXALL; + netdev->features &= ~NETIF_F_LRO; + netdev->features &= ~NETIF_F_GRO_HW; + netdev->features &= ~NETIF_F_RXFCS; + +#define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f) + if (FT_CAP(flow_modify_en) && + FT_CAP(modify_root) && + FT_CAP(identified_miss_table_mode) && + FT_CAP(flow_table_modify)) { +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + netdev->hw_features |= NETIF_F_HW_TC; +#endif +#ifdef CONFIG_MLX5_EN_ARFS + netdev->hw_features |= NETIF_F_NTUPLE; +#endif + } + + netdev->features |= NETIF_F_HIGHDMA; + netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER; + + netdev->priv_flags |= IFF_UNICAST_FLT; + + netif_set_tso_max_size(netdev, GSO_MAX_SIZE); + mlx5e_set_xdp_feature(netdev); + mlx5e_set_netdev_dev_addr(netdev); + mlx5e_macsec_build_netdev(priv); + mlx5e_ipsec_build_netdev(priv); + mlx5e_ktls_build_netdev(priv); +} + +void mlx5e_create_q_counters(struct mlx5e_priv *priv) +{ + u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); + err = mlx5_cmd_exec_inout(mdev, alloc_q_counter, in, out); + if (!err) + priv->q_counter = + MLX5_GET(alloc_q_counter_out, out, counter_set_id); + + err = mlx5_cmd_exec_inout(mdev, alloc_q_counter, in, out); + if (!err) + priv->drop_rq_q_counter = + MLX5_GET(alloc_q_counter_out, out, counter_set_id); +} + +void mlx5e_destroy_q_counters(struct mlx5e_priv *priv) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; + + MLX5_SET(dealloc_q_counter_in, in, opcode, + MLX5_CMD_OP_DEALLOC_Q_COUNTER); + if (priv->q_counter) { + MLX5_SET(dealloc_q_counter_in, in, counter_set_id, + priv->q_counter); + mlx5_cmd_exec_in(priv->mdev, dealloc_q_counter, in); + } + + if (priv->drop_rq_q_counter) { + MLX5_SET(dealloc_q_counter_in, in, counter_set_id, + priv->drop_rq_q_counter); + mlx5_cmd_exec_in(priv->mdev, dealloc_q_counter, in); + } +} + +static int mlx5e_nic_init(struct mlx5_core_dev *mdev, + struct net_device *netdev) +{ + const bool take_rtnl = netdev->reg_state == NETREG_REGISTERED; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_flow_steering *fs; + int err; + + mlx5e_build_nic_params(priv, &priv->xsk, netdev->mtu); + mlx5e_vxlan_set_netdev_info(priv); + + mlx5e_timestamp_init(priv); + + priv->dfs_root = debugfs_create_dir("nic", + mlx5_debugfs_get_dev_root(mdev)); + + fs = mlx5e_fs_init(priv->profile, mdev, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state), + priv->dfs_root); + if (!fs) { + err = -ENOMEM; + mlx5_core_err(mdev, "FS initialization failed, %d\n", err); + debugfs_remove_recursive(priv->dfs_root); + return err; + } + priv->fs = fs; + + err = mlx5e_ktls_init(priv); + if (err) + mlx5_core_err(mdev, "TLS initialization failed, %d\n", err); + + mlx5e_health_create_reporters(priv); + + /* If netdev is already registered (e.g. move from uplink to nic profile), + * RTNL lock must be held before triggering netdev notifiers. + */ + if (take_rtnl) + rtnl_lock(); + + /* update XDP supported features */ + mlx5e_set_xdp_feature(netdev); + + if (take_rtnl) + rtnl_unlock(); + + return 0; +} + +static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) +{ + mlx5e_health_destroy_reporters(priv); + mlx5e_ktls_cleanup(priv); + mlx5e_fs_cleanup(priv->fs); + debugfs_remove_recursive(priv->dfs_root); + priv->fs = NULL; +} + +static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + enum mlx5e_rx_res_features features; + int err; + + priv->rx_res = mlx5e_rx_res_alloc(); + if (!priv->rx_res) + return -ENOMEM; + + mlx5e_create_q_counters(priv); + + err = mlx5e_open_drop_rq(priv, &priv->drop_rq); + if (err) { + mlx5_core_err(mdev, "open drop rq failed, %d\n", err); + goto err_destroy_q_counters; + } + + features = MLX5E_RX_RES_FEATURE_PTP; + if (mlx5_tunnel_inner_ft_supported(mdev)) + features |= MLX5E_RX_RES_FEATURE_INNER_FT; + err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features, + priv->max_nch, priv->drop_rq.rqn, + &priv->channels.params.packet_merge, + priv->channels.params.num_channels); + if (err) + goto err_close_drop_rq; + + err = mlx5e_create_flow_steering(priv->fs, priv->rx_res, priv->profile, + priv->netdev); + if (err) { + mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); + goto err_destroy_rx_res; + } + + err = mlx5e_tc_nic_init(priv); + if (err) + goto err_destroy_flow_steering; + + err = mlx5e_accel_init_rx(priv); + if (err) + goto err_tc_nic_cleanup; + +#ifdef CONFIG_MLX5_EN_ARFS + priv->netdev->rx_cpu_rmap = mlx5_eq_table_get_rmap(priv->mdev); +#endif + + return 0; + +err_tc_nic_cleanup: + mlx5e_tc_nic_cleanup(priv); +err_destroy_flow_steering: + mlx5e_destroy_flow_steering(priv->fs, !!(priv->netdev->hw_features & NETIF_F_NTUPLE), + priv->profile); +err_destroy_rx_res: + mlx5e_rx_res_destroy(priv->rx_res); +err_close_drop_rq: + mlx5e_close_drop_rq(&priv->drop_rq); +err_destroy_q_counters: + mlx5e_destroy_q_counters(priv); + mlx5e_rx_res_free(priv->rx_res); + priv->rx_res = NULL; + return err; +} + +static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) +{ + mlx5e_accel_cleanup_rx(priv); + mlx5e_tc_nic_cleanup(priv); + mlx5e_destroy_flow_steering(priv->fs, !!(priv->netdev->hw_features & NETIF_F_NTUPLE), + priv->profile); + mlx5e_rx_res_destroy(priv->rx_res); + mlx5e_close_drop_rq(&priv->drop_rq); + mlx5e_destroy_q_counters(priv); + mlx5e_rx_res_free(priv->rx_res); + priv->rx_res = NULL; +} + +static void mlx5e_set_mqprio_rl(struct mlx5e_priv *priv) +{ + struct mlx5e_params *params; + struct mlx5e_mqprio_rl *rl; + + params = &priv->channels.params; + if (params->mqprio.mode != TC_MQPRIO_MODE_CHANNEL) + return; + + rl = mlx5e_mqprio_rl_create(priv->mdev, params->mqprio.num_tc, + params->mqprio.channel.max_rate); + if (IS_ERR(rl)) + rl = NULL; + priv->mqprio_rl = rl; + mlx5e_mqprio_rl_update_params(params, rl); +} + +static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) +{ + int err; + + err = mlx5e_create_tises(priv); + if (err) { + mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err); + return err; + } + + err = mlx5e_accel_init_tx(priv); + if (err) + goto err_destroy_tises; + + mlx5e_set_mqprio_rl(priv); + mlx5e_dcbnl_initialize(priv); + return 0; + +err_destroy_tises: + mlx5e_destroy_tises(priv); + return err; +} + +static void mlx5e_nic_enable(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + mlx5e_fs_init_l2_addr(priv->fs, netdev); + mlx5e_ipsec_init(priv); + + err = mlx5e_macsec_init(priv); + if (err) + mlx5_core_err(mdev, "MACsec initialization failed, %d\n", err); + + /* Marking the link as currently not needed by the Driver */ + if (!netif_running(netdev)) + mlx5e_modify_admin_state(mdev, MLX5_PORT_DOWN); + + mlx5e_set_netdev_mtu_boundaries(priv); + mlx5e_set_dev_port_mtu(priv); + + mlx5_lag_add_netdev(mdev, netdev); + + mlx5e_enable_async_events(priv); + mlx5e_enable_blocking_events(priv); + if (mlx5e_monitor_counter_supported(priv)) + mlx5e_monitor_counter_init(priv); + + mlx5e_hv_vhca_stats_create(priv); + if (netdev->reg_state != NETREG_REGISTERED) + return; + mlx5e_dcbnl_init_app(priv); + + mlx5e_nic_set_rx_mode(priv); + + rtnl_lock(); + if (netif_running(netdev)) + mlx5e_open(netdev); + udp_tunnel_nic_reset_ntf(priv->netdev); + netif_device_attach(netdev); + rtnl_unlock(); +} + +static void mlx5e_nic_disable(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + if (priv->netdev->reg_state == NETREG_REGISTERED) + mlx5e_dcbnl_delete_app(priv); + + rtnl_lock(); + if (netif_running(priv->netdev)) + mlx5e_close(priv->netdev); + netif_device_detach(priv->netdev); + rtnl_unlock(); + + mlx5e_nic_set_rx_mode(priv); + + mlx5e_hv_vhca_stats_destroy(priv); + if (mlx5e_monitor_counter_supported(priv)) + mlx5e_monitor_counter_cleanup(priv); + + mlx5e_disable_blocking_events(priv); + if (priv->en_trap) { + mlx5e_deactivate_trap(priv); + mlx5e_close_trap(priv->en_trap); + priv->en_trap = NULL; + } + mlx5e_disable_async_events(priv); + mlx5_lag_remove_netdev(mdev, priv->netdev); + mlx5_vxlan_reset_to_default(mdev->vxlan); + mlx5e_macsec_cleanup(priv); + mlx5e_ipsec_cleanup(priv); +} + +int mlx5e_update_nic_rx(struct mlx5e_priv *priv) +{ + return mlx5e_refresh_tirs(priv, false, false); +} + +static const struct mlx5e_profile mlx5e_nic_profile = { + .init = mlx5e_nic_init, + .cleanup = mlx5e_nic_cleanup, + .init_rx = mlx5e_init_nic_rx, + .cleanup_rx = mlx5e_cleanup_nic_rx, + .init_tx = mlx5e_init_nic_tx, + .cleanup_tx = mlx5e_cleanup_nic_tx, + .enable = mlx5e_nic_enable, + .disable = mlx5e_nic_disable, + .update_rx = mlx5e_update_nic_rx, + .update_stats = mlx5e_stats_update_ndo_stats, + .update_carrier = mlx5e_update_carrier, + .rx_handlers = &mlx5e_rx_handlers_nic, + .max_tc = MLX5E_MAX_NUM_TC, + .stats_grps = mlx5e_nic_stats_grps, + .stats_grps_num = mlx5e_nic_stats_grps_num, + .features = BIT(MLX5E_PROFILE_FEATURE_PTP_RX) | + BIT(MLX5E_PROFILE_FEATURE_PTP_TX) | + BIT(MLX5E_PROFILE_FEATURE_QOS_HTB) | + BIT(MLX5E_PROFILE_FEATURE_FS_VLAN) | + BIT(MLX5E_PROFILE_FEATURE_FS_TC), +}; + +static int mlx5e_profile_max_num_channels(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile) +{ + int nch; + + nch = mlx5e_get_max_num_channels(mdev); + + if (profile->max_nch_limit) + nch = min_t(int, nch, profile->max_nch_limit(mdev)); + return nch; +} + +static unsigned int +mlx5e_calc_max_nch(struct mlx5_core_dev *mdev, struct net_device *netdev, + const struct mlx5e_profile *profile) + +{ + unsigned int max_nch, tmp; + + /* core resources */ + max_nch = mlx5e_profile_max_num_channels(mdev, profile); + + /* netdev rx queues */ + max_nch = min_t(unsigned int, max_nch, netdev->num_rx_queues); + + /* netdev tx queues */ + tmp = netdev->num_tx_queues; + if (mlx5_qos_is_supported(mdev)) + tmp -= mlx5e_qos_max_leaf_nodes(mdev); + if (MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn)) + tmp -= profile->max_tc; + tmp = tmp / profile->max_tc; + max_nch = min_t(unsigned int, max_nch, tmp); + + return max_nch; +} + +int mlx5e_get_pf_num_tirs(struct mlx5_core_dev *mdev) +{ + /* Indirect TIRS: 2 sets of TTCs (inner + outer steering) + * and 1 set of direct TIRS + */ + return 2 * MLX5E_NUM_INDIR_TIRS + + mlx5e_profile_max_num_channels(mdev, &mlx5e_nic_profile); +} + +void mlx5e_set_rx_mode_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + set_rx_mode_work); + + return mlx5e_fs_set_rx_mode_work(priv->fs, priv->netdev); +} + +/* mlx5e generic netdev management API (move to en_common.c) */ +int mlx5e_priv_init(struct mlx5e_priv *priv, + const struct mlx5e_profile *profile, + struct net_device *netdev, + struct mlx5_core_dev *mdev) +{ + int nch, num_txqs, node; + int err; + + num_txqs = netdev->num_tx_queues; + nch = mlx5e_calc_max_nch(mdev, netdev, profile); + node = dev_to_node(mlx5_core_dma_dev(mdev)); + + /* priv init */ + priv->mdev = mdev; + priv->netdev = netdev; + priv->max_nch = nch; + priv->max_opened_tc = 1; + + if (!alloc_cpumask_var(&priv->scratchpad.cpumask, GFP_KERNEL)) + return -ENOMEM; + + mutex_init(&priv->state_lock); + + err = mlx5e_selq_init(&priv->selq, &priv->state_lock); + if (err) + goto err_free_cpumask; + + INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); + INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); + INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work); + INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work); + + priv->wq = create_singlethread_workqueue("mlx5e"); + if (!priv->wq) + goto err_free_selq; + + priv->txq2sq = kcalloc_node(num_txqs, sizeof(*priv->txq2sq), GFP_KERNEL, node); + if (!priv->txq2sq) + goto err_destroy_workqueue; + + priv->tx_rates = kcalloc_node(num_txqs, sizeof(*priv->tx_rates), GFP_KERNEL, node); + if (!priv->tx_rates) + goto err_free_txq2sq; + + priv->channel_stats = + kcalloc_node(nch, sizeof(*priv->channel_stats), GFP_KERNEL, node); + if (!priv->channel_stats) + goto err_free_tx_rates; + + return 0; + +err_free_tx_rates: + kfree(priv->tx_rates); +err_free_txq2sq: + kfree(priv->txq2sq); +err_destroy_workqueue: + destroy_workqueue(priv->wq); +err_free_selq: + mlx5e_selq_cleanup(&priv->selq); +err_free_cpumask: + free_cpumask_var(priv->scratchpad.cpumask); + return -ENOMEM; +} + +void mlx5e_priv_cleanup(struct mlx5e_priv *priv) +{ + int i; + + /* bail if change profile failed and also rollback failed */ + if (!priv->mdev) + return; + + for (i = 0; i < priv->stats_nch; i++) + kvfree(priv->channel_stats[i]); + kfree(priv->channel_stats); + kfree(priv->tx_rates); + kfree(priv->txq2sq); + destroy_workqueue(priv->wq); + mutex_lock(&priv->state_lock); + mlx5e_selq_cleanup(&priv->selq); + mutex_unlock(&priv->state_lock); + free_cpumask_var(priv->scratchpad.cpumask); + + for (i = 0; i < priv->htb_max_qos_sqs; i++) + kfree(priv->htb_qos_sq_stats[i]); + kvfree(priv->htb_qos_sq_stats); + + memset(priv, 0, sizeof(*priv)); +} + +static unsigned int mlx5e_get_max_num_txqs(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile) +{ + unsigned int nch, ptp_txqs, qos_txqs; + + nch = mlx5e_profile_max_num_channels(mdev, profile); + + ptp_txqs = MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn) && + mlx5e_profile_feature_cap(profile, PTP_TX) ? + profile->max_tc : 0; + + qos_txqs = mlx5_qos_is_supported(mdev) && + mlx5e_profile_feature_cap(profile, QOS_HTB) ? + mlx5e_qos_max_leaf_nodes(mdev) : 0; + + return nch * profile->max_tc + ptp_txqs + qos_txqs; +} + +static unsigned int mlx5e_get_max_num_rxqs(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile) +{ + return mlx5e_profile_max_num_channels(mdev, profile); +} + +struct net_device * +mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile) +{ + struct net_device *netdev; + unsigned int txqs, rxqs; + int err; + + txqs = mlx5e_get_max_num_txqs(mdev, profile); + rxqs = mlx5e_get_max_num_rxqs(mdev, profile); + + netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), txqs, rxqs); + if (!netdev) { + mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); + return NULL; + } + + err = mlx5e_priv_init(netdev_priv(netdev), profile, netdev, mdev); + if (err) { + mlx5_core_err(mdev, "mlx5e_priv_init failed, err=%d\n", err); + goto err_free_netdev; + } + + netif_carrier_off(netdev); + netif_tx_disable(netdev); + dev_net_set(netdev, mlx5_core_net(mdev)); + + return netdev; + +err_free_netdev: + free_netdev(netdev); + + return NULL; +} + +static void mlx5e_update_features(struct net_device *netdev) +{ + if (netdev->reg_state != NETREG_REGISTERED) + return; /* features will be updated on netdev registration */ + + rtnl_lock(); + netdev_update_features(netdev); + rtnl_unlock(); +} + +static void mlx5e_reset_channels(struct net_device *netdev) +{ + netdev_reset_tc(netdev); +} + +int mlx5e_attach_netdev(struct mlx5e_priv *priv) +{ + const bool take_rtnl = priv->netdev->reg_state == NETREG_REGISTERED; + const struct mlx5e_profile *profile = priv->profile; + int max_nch; + int err; + + clear_bit(MLX5E_STATE_DESTROYING, &priv->state); + if (priv->fs) + mlx5e_fs_set_state_destroy(priv->fs, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + + /* Validate the max_wqe_size_sq capability. */ + if (WARN_ON_ONCE(mlx5e_get_max_sq_wqebbs(priv->mdev) < MLX5E_MAX_TX_WQEBBS)) { + mlx5_core_warn(priv->mdev, "MLX5E: Max SQ WQEBBs firmware capability: %u, needed %u\n", + mlx5e_get_max_sq_wqebbs(priv->mdev), (unsigned int)MLX5E_MAX_TX_WQEBBS); + return -EIO; + } + + /* max number of channels may have changed */ + max_nch = mlx5e_calc_max_nch(priv->mdev, priv->netdev, profile); + if (priv->channels.params.num_channels > max_nch) { + mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch); + /* Reducing the number of channels - RXFH has to be reset, and + * mlx5e_num_channels_changed below will build the RQT. + */ + priv->netdev->priv_flags &= ~IFF_RXFH_CONFIGURED; + priv->channels.params.num_channels = max_nch; + if (priv->channels.params.mqprio.mode == TC_MQPRIO_MODE_CHANNEL) { + mlx5_core_warn(priv->mdev, "MLX5E: Disabling MQPRIO channel mode\n"); + mlx5e_params_mqprio_reset(&priv->channels.params); + } + } + if (max_nch != priv->max_nch) { + mlx5_core_warn(priv->mdev, + "MLX5E: Updating max number of channels from %u to %u\n", + priv->max_nch, max_nch); + priv->max_nch = max_nch; + } + + /* 1. Set the real number of queues in the kernel the first time. + * 2. Set our default XPS cpumask. + * 3. Build the RQT. + * + * rtnl_lock is required by netif_set_real_num_*_queues in case the + * netdev has been registered by this point (if this function was called + * in the reload or resume flow). + */ + if (take_rtnl) + rtnl_lock(); + err = mlx5e_num_channels_changed(priv); + if (take_rtnl) + rtnl_unlock(); + if (err) + goto out; + + err = profile->init_tx(priv); + if (err) + goto out; + + err = profile->init_rx(priv); + if (err) + goto err_cleanup_tx; + + if (profile->enable) + profile->enable(priv); + + mlx5e_update_features(priv->netdev); + + return 0; + +err_cleanup_tx: + profile->cleanup_tx(priv); + +out: + mlx5e_reset_channels(priv->netdev); + set_bit(MLX5E_STATE_DESTROYING, &priv->state); + if (priv->fs) + mlx5e_fs_set_state_destroy(priv->fs, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + cancel_work_sync(&priv->update_stats_work); + return err; +} + +void mlx5e_detach_netdev(struct mlx5e_priv *priv) +{ + const struct mlx5e_profile *profile = priv->profile; + + set_bit(MLX5E_STATE_DESTROYING, &priv->state); + if (priv->fs) + mlx5e_fs_set_state_destroy(priv->fs, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + + if (profile->disable) + profile->disable(priv); + flush_workqueue(priv->wq); + + profile->cleanup_rx(priv); + profile->cleanup_tx(priv); + mlx5e_reset_channels(priv->netdev); + cancel_work_sync(&priv->update_stats_work); +} + +static int +mlx5e_netdev_init_profile(struct net_device *netdev, struct mlx5_core_dev *mdev, + const struct mlx5e_profile *new_profile, void *new_ppriv) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + err = mlx5e_priv_init(priv, new_profile, netdev, mdev); + if (err) { + mlx5_core_err(mdev, "mlx5e_priv_init failed, err=%d\n", err); + return err; + } + netif_carrier_off(netdev); + priv->profile = new_profile; + priv->ppriv = new_ppriv; + err = new_profile->init(priv->mdev, priv->netdev); + if (err) + goto priv_cleanup; + + return 0; + +priv_cleanup: + mlx5e_priv_cleanup(priv); + return err; +} + +static int +mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mdev, + const struct mlx5e_profile *new_profile, void *new_ppriv) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + err = mlx5e_netdev_init_profile(netdev, mdev, new_profile, new_ppriv); + if (err) + return err; + + err = mlx5e_attach_netdev(priv); + if (err) + goto profile_cleanup; + return err; + +profile_cleanup: + new_profile->cleanup(priv); + mlx5e_priv_cleanup(priv); + return err; +} + +int mlx5e_netdev_change_profile(struct mlx5e_priv *priv, + const struct mlx5e_profile *new_profile, void *new_ppriv) +{ + const struct mlx5e_profile *orig_profile = priv->profile; + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + void *orig_ppriv = priv->ppriv; + int err, rollback_err; + + /* cleanup old profile */ + mlx5e_detach_netdev(priv); + priv->profile->cleanup(priv); + mlx5e_priv_cleanup(priv); + + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + mlx5e_netdev_init_profile(netdev, mdev, new_profile, new_ppriv); + set_bit(MLX5E_STATE_DESTROYING, &priv->state); + return -EIO; + } + + err = mlx5e_netdev_attach_profile(netdev, mdev, new_profile, new_ppriv); + if (err) { /* roll back to original profile */ + netdev_warn(netdev, "%s: new profile init failed, %d\n", __func__, err); + goto rollback; + } + + return 0; + +rollback: + rollback_err = mlx5e_netdev_attach_profile(netdev, mdev, orig_profile, orig_ppriv); + if (rollback_err) + netdev_err(netdev, "%s: failed to rollback to orig profile, %d\n", + __func__, rollback_err); + return err; +} + +void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv) +{ + mlx5e_netdev_change_profile(priv, &mlx5e_nic_profile, NULL); +} + +void mlx5e_destroy_netdev(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + + mlx5e_priv_cleanup(priv); + free_netdev(netdev); +} + +static int mlx5e_resume(struct auxiliary_device *adev) +{ + struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); + struct mlx5e_dev *mlx5e_dev = auxiliary_get_drvdata(adev); + struct mlx5e_priv *priv = mlx5e_dev->priv; + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = edev->mdev; + int err; + + if (netif_device_present(netdev)) + return 0; + + err = mlx5e_create_mdev_resources(mdev); + if (err) + return err; + + err = mlx5e_attach_netdev(priv); + if (err) { + mlx5e_destroy_mdev_resources(mdev); + return err; + } + + return 0; +} + +static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state) +{ + struct mlx5e_dev *mlx5e_dev = auxiliary_get_drvdata(adev); + struct mlx5e_priv *priv = mlx5e_dev->priv; + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + + if (!netif_device_present(netdev)) { + if (test_bit(MLX5E_STATE_DESTROYING, &priv->state)) + mlx5e_destroy_mdev_resources(mdev); + return -ENODEV; + } + + mlx5e_detach_netdev(priv); + mlx5e_destroy_mdev_resources(mdev); + return 0; +} + +static int mlx5e_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) +{ + struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); + const struct mlx5e_profile *profile = &mlx5e_nic_profile; + struct mlx5_core_dev *mdev = edev->mdev; + struct mlx5e_dev *mlx5e_dev; + struct net_device *netdev; + pm_message_t state = {}; + struct mlx5e_priv *priv; + int err; + + mlx5e_dev = mlx5e_create_devlink(&adev->dev, mdev); + if (IS_ERR(mlx5e_dev)) + return PTR_ERR(mlx5e_dev); + auxiliary_set_drvdata(adev, mlx5e_dev); + + err = mlx5e_devlink_port_register(mlx5e_dev, mdev); + if (err) { + mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err); + goto err_devlink_unregister; + } + + netdev = mlx5e_create_netdev(mdev, profile); + if (!netdev) { + mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); + err = -ENOMEM; + goto err_devlink_port_unregister; + } + SET_NETDEV_DEVLINK_PORT(netdev, &mlx5e_dev->dl_port); + + mlx5e_build_nic_netdev(netdev); + + priv = netdev_priv(netdev); + mlx5e_dev->priv = priv; + + priv->profile = profile; + priv->ppriv = NULL; + + err = profile->init(mdev, netdev); + if (err) { + mlx5_core_err(mdev, "mlx5e_nic_profile init failed, %d\n", err); + goto err_destroy_netdev; + } + + err = mlx5e_resume(adev); + if (err) { + mlx5_core_err(mdev, "mlx5e_resume failed, %d\n", err); + goto err_profile_cleanup; + } + + err = register_netdev(netdev); + if (err) { + mlx5_core_err(mdev, "register_netdev failed, %d\n", err); + goto err_resume; + } + + mlx5e_dcbnl_init_app(priv); + mlx5_core_uplink_netdev_set(mdev, netdev); + mlx5e_params_print_info(mdev, &priv->channels.params); + return 0; + +err_resume: + mlx5e_suspend(adev, state); +err_profile_cleanup: + profile->cleanup(priv); +err_destroy_netdev: + mlx5e_destroy_netdev(priv); +err_devlink_port_unregister: + mlx5e_devlink_port_unregister(mlx5e_dev); +err_devlink_unregister: + mlx5e_destroy_devlink(mlx5e_dev); + return err; +} + +static void mlx5e_remove(struct auxiliary_device *adev) +{ + struct mlx5e_dev *mlx5e_dev = auxiliary_get_drvdata(adev); + struct mlx5e_priv *priv = mlx5e_dev->priv; + pm_message_t state = {}; + + mlx5_core_uplink_netdev_set(priv->mdev, NULL); + mlx5e_dcbnl_delete_app(priv); + unregister_netdev(priv->netdev); + mlx5e_suspend(adev, state); + priv->profile->cleanup(priv); + mlx5e_destroy_netdev(priv); + mlx5e_devlink_port_unregister(mlx5e_dev); + mlx5e_destroy_devlink(mlx5e_dev); +} + +static const struct auxiliary_device_id mlx5e_id_table[] = { + { .name = MLX5_ADEV_NAME ".eth", }, + {}, +}; + +MODULE_DEVICE_TABLE(auxiliary, mlx5e_id_table); + +static struct auxiliary_driver mlx5e_driver = { + .name = "eth", + .probe = mlx5e_probe, + .remove = mlx5e_remove, + .suspend = mlx5e_suspend, + .resume = mlx5e_resume, + .id_table = mlx5e_id_table, +}; + +int mlx5e_init(void) +{ + int ret; + + mlx5e_build_ptys2ethtool_map(); + ret = auxiliary_driver_register(&mlx5e_driver); + if (ret) + return ret; + + ret = mlx5e_rep_init(); + if (ret) + auxiliary_driver_unregister(&mlx5e_driver); + return ret; +} + +void mlx5e_cleanup(void) +{ + mlx5e_rep_cleanup(); + auxiliary_driver_unregister(&mlx5e_driver); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c new file mode 100644 index 0000000000..751d3ffcd2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -0,0 +1,1728 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "eswitch.h" +#include "en.h" +#include "en_rep.h" +#include "en/params.h" +#include "en/txrx.h" +#include "en_tc.h" +#include "en/rep/tc.h" +#include "en/rep/neigh.h" +#include "en/rep/bridge.h" +#include "en/devlink.h" +#include "fs_core.h" +#include "lib/mlx5.h" +#include "lib/devcom.h" +#include "lib/vxlan.h" +#define CREATE_TRACE_POINTS +#include "diag/en_rep_tracepoint.h" +#include "diag/reporter_vnic.h" +#include "en_accel/ipsec.h" +#include "en/tc/int_port.h" +#include "en/ptp.h" +#include "en/fs_ethtool.h" + +#define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \ + max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) +#define MLX5E_REP_PARAMS_DEF_NUM_CHANNELS 1 + +static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; + +static void mlx5e_rep_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + int count; + + strscpy(drvinfo->driver, mlx5e_rep_driver_name, + sizeof(drvinfo->driver)); + count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%04d (%.16s)", fw_rev_maj(mdev), + fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id); + if (count >= sizeof(drvinfo->fw_version)) + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%04d", fw_rev_maj(mdev), + fw_rev_min(mdev), fw_rev_sub(mdev)); +} + +static const struct counter_desc sw_rep_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) }, +}; + +static const struct counter_desc vport_rep_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, vport_rx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, vport_rx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, vport_tx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, vport_tx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, + rx_vport_rdma_unicast_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, rx_vport_rdma_unicast_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, + tx_vport_rdma_unicast_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, tx_vport_rdma_unicast_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, + rx_vport_rdma_multicast_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, + rx_vport_rdma_multicast_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, + tx_vport_rdma_multicast_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, + tx_vport_rdma_multicast_bytes) }, +}; + +#define NUM_VPORT_REP_SW_COUNTERS ARRAY_SIZE(sw_rep_stats_desc) +#define NUM_VPORT_REP_HW_COUNTERS ARRAY_SIZE(vport_rep_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(sw_rep) +{ + return NUM_VPORT_REP_SW_COUNTERS; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(sw_rep) +{ + int i; + + for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + sw_rep_stats_desc[i].format); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(sw_rep) +{ + int i; + + for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, + sw_rep_stats_desc, i); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw_rep) +{ + struct mlx5e_sw_stats *s = &priv->stats.sw; + struct rtnl_link_stats64 stats64 = {}; + + memset(s, 0, sizeof(*s)); + mlx5e_fold_sw_stats64(priv, &stats64); + + s->rx_packets = stats64.rx_packets; + s->rx_bytes = stats64.rx_bytes; + s->tx_packets = stats64.tx_packets; + s->tx_bytes = stats64.tx_bytes; + s->tx_queue_dropped = stats64.tx_dropped; +} + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(vport_rep) +{ + return NUM_VPORT_REP_HW_COUNTERS; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vport_rep) +{ + int i; + + for (i = 0; i < NUM_VPORT_REP_HW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, vport_rep_stats_desc[i].format); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vport_rep) +{ + int i; + + for (i = 0; i < NUM_VPORT_REP_HW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.rep_stats, + vport_rep_stats_desc, i); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport_rep) +{ + struct mlx5e_rep_stats *rep_stats = &priv->stats.rep_stats; + int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + u32 *out; + int err; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return; + + err = mlx5_core_query_vport_counter(esw->dev, 1, rep->vport - 1, 0, out); + if (err) { + netdev_warn(priv->netdev, "vport %d error %d reading stats\n", + rep->vport, err); + goto out; + } + + #define MLX5_GET_CTR(p, x) \ + MLX5_GET64(query_vport_counter_out, p, x) + /* flip tx/rx as we are reporting the counters for the switch vport */ + rep_stats->vport_rx_packets = + MLX5_GET_CTR(out, transmitted_ib_unicast.packets) + + MLX5_GET_CTR(out, transmitted_eth_unicast.packets) + + MLX5_GET_CTR(out, transmitted_ib_multicast.packets) + + MLX5_GET_CTR(out, transmitted_eth_multicast.packets) + + MLX5_GET_CTR(out, transmitted_eth_broadcast.packets); + + rep_stats->vport_tx_packets = + MLX5_GET_CTR(out, received_ib_unicast.packets) + + MLX5_GET_CTR(out, received_eth_unicast.packets) + + MLX5_GET_CTR(out, received_ib_multicast.packets) + + MLX5_GET_CTR(out, received_eth_multicast.packets) + + MLX5_GET_CTR(out, received_eth_broadcast.packets); + + rep_stats->vport_rx_bytes = + MLX5_GET_CTR(out, transmitted_ib_unicast.octets) + + MLX5_GET_CTR(out, transmitted_eth_unicast.octets) + + MLX5_GET_CTR(out, transmitted_ib_multicast.octets) + + MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); + + rep_stats->vport_tx_bytes = + MLX5_GET_CTR(out, received_ib_unicast.octets) + + MLX5_GET_CTR(out, received_eth_unicast.octets) + + MLX5_GET_CTR(out, received_ib_multicast.octets) + + MLX5_GET_CTR(out, received_eth_multicast.octets) + + MLX5_GET_CTR(out, received_eth_broadcast.octets); + + rep_stats->rx_vport_rdma_unicast_packets = + MLX5_GET_CTR(out, transmitted_ib_unicast.packets); + rep_stats->tx_vport_rdma_unicast_packets = + MLX5_GET_CTR(out, received_ib_unicast.packets); + rep_stats->rx_vport_rdma_unicast_bytes = + MLX5_GET_CTR(out, transmitted_ib_unicast.octets); + rep_stats->tx_vport_rdma_unicast_bytes = + MLX5_GET_CTR(out, received_ib_unicast.octets); + rep_stats->rx_vport_rdma_multicast_packets = + MLX5_GET_CTR(out, transmitted_ib_multicast.packets); + rep_stats->tx_vport_rdma_multicast_packets = + MLX5_GET_CTR(out, received_ib_multicast.packets); + rep_stats->rx_vport_rdma_multicast_bytes = + MLX5_GET_CTR(out, transmitted_ib_multicast.octets); + rep_stats->tx_vport_rdma_multicast_bytes = + MLX5_GET_CTR(out, received_ib_multicast.octets); + +out: + kvfree(out); +} + +static void mlx5e_rep_get_strings(struct net_device *dev, + u32 stringset, uint8_t *data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + switch (stringset) { + case ETH_SS_STATS: + mlx5e_stats_fill_strings(priv, data); + break; + } +} + +static void mlx5e_rep_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_ethtool_stats(priv, stats, data); +} + +static int mlx5e_rep_get_sset_count(struct net_device *dev, int sset) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + switch (sset) { + case ETH_SS_STATS: + return mlx5e_stats_total_num(priv); + default: + return -EOPNOTSUPP; + } +} + +static void +mlx5e_rep_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_ringparam(priv, param, kernel_param); +} + +static int +mlx5e_rep_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_set_ringparam(priv, param); +} + +static void mlx5e_rep_get_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_channels(priv, ch); +} + +static int mlx5e_rep_set_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_set_channels(priv, ch); +} + +static int mlx5e_rep_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal); +} + +static int mlx5e_rep_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_coalesce(priv, coal, kernel_coal, extack); +} + +static u32 mlx5e_rep_get_rxfh_key_size(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_rxfh_key_size(priv); +} + +static u32 mlx5e_rep_get_rxfh_indir_size(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_rxfh_indir_size(priv); +} + +static const struct ethtool_ops mlx5e_rep_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USE_ADAPTIVE, + .get_drvinfo = mlx5e_rep_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = mlx5e_rep_get_strings, + .get_sset_count = mlx5e_rep_get_sset_count, + .get_ethtool_stats = mlx5e_rep_get_ethtool_stats, + .get_ringparam = mlx5e_rep_get_ringparam, + .set_ringparam = mlx5e_rep_set_ringparam, + .get_channels = mlx5e_rep_get_channels, + .set_channels = mlx5e_rep_set_channels, + .get_coalesce = mlx5e_rep_get_coalesce, + .set_coalesce = mlx5e_rep_set_coalesce, + .get_rxfh_key_size = mlx5e_rep_get_rxfh_key_size, + .get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size, +}; + +static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_sq *rep_sq, *tmp; + struct mlx5e_rep_sq_peer *sq_peer; + struct mlx5e_rep_priv *rpriv; + unsigned long i; + + if (esw->mode != MLX5_ESWITCH_OFFLOADS) + return; + + rpriv = mlx5e_rep_to_rep_priv(rep); + list_for_each_entry_safe(rep_sq, tmp, &rpriv->vport_sqs_list, list) { + mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule); + xa_for_each(&rep_sq->sq_peer, i, sq_peer) { + if (sq_peer->rule) + mlx5_eswitch_del_send_to_vport_rule(sq_peer->rule); + + xa_erase(&rep_sq->sq_peer, i); + kfree(sq_peer); + } + + xa_destroy(&rep_sq->sq_peer); + list_del(&rep_sq->list); + kfree(rep_sq); + } +} + +static int mlx5e_sqs2vport_add_peers_rules(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, + struct mlx5e_rep_sq *rep_sq, int i) +{ + struct mlx5_flow_handle *flow_rule; + struct mlx5_devcom_comp_dev *tmp; + struct mlx5_eswitch *peer_esw; + + mlx5_devcom_for_each_peer_entry(esw->devcom, peer_esw, tmp) { + u16 peer_rule_idx = MLX5_CAP_GEN(peer_esw->dev, vhca_id); + struct mlx5e_rep_sq_peer *sq_peer; + int err; + + sq_peer = kzalloc(sizeof(*sq_peer), GFP_KERNEL); + if (!sq_peer) + return -ENOMEM; + + flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, + rep, rep_sq->sqn); + if (IS_ERR(flow_rule)) { + kfree(sq_peer); + return PTR_ERR(flow_rule); + } + + sq_peer->rule = flow_rule; + sq_peer->peer = peer_esw; + err = xa_insert(&rep_sq->sq_peer, peer_rule_idx, sq_peer, GFP_KERNEL); + if (err) { + kfree(sq_peer); + mlx5_eswitch_del_send_to_vport_rule(flow_rule); + return err; + } + } + + return 0; +} + +static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, + u32 *sqns_array, int sqns_num) +{ + struct mlx5_flow_handle *flow_rule; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_rep_sq *rep_sq; + bool devcom_locked = false; + int err; + int i; + + if (esw->mode != MLX5_ESWITCH_OFFLOADS) + return 0; + + rpriv = mlx5e_rep_to_rep_priv(rep); + + if (mlx5_devcom_comp_is_ready(esw->devcom) && + mlx5_devcom_for_each_peer_begin(esw->devcom)) + devcom_locked = true; + + for (i = 0; i < sqns_num; i++) { + rep_sq = kzalloc(sizeof(*rep_sq), GFP_KERNEL); + if (!rep_sq) { + err = -ENOMEM; + goto out_err; + } + + /* Add re-inject rule to the PF/representor sqs */ + flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep, + sqns_array[i]); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + kfree(rep_sq); + goto out_err; + } + rep_sq->send_to_vport_rule = flow_rule; + rep_sq->sqn = sqns_array[i]; + + xa_init(&rep_sq->sq_peer); + if (devcom_locked) { + err = mlx5e_sqs2vport_add_peers_rules(esw, rep, rep_sq, i); + if (err) { + mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule); + xa_destroy(&rep_sq->sq_peer); + kfree(rep_sq); + goto out_err; + } + } + + list_add(&rep_sq->list, &rpriv->vport_sqs_list); + } + + if (devcom_locked) + mlx5_devcom_for_each_peer_end(esw->devcom); + + return 0; + +out_err: + mlx5e_sqs2vport_stop(esw, rep); + + if (devcom_locked) + mlx5_devcom_for_each_peer_end(esw->devcom); + + return err; +} + +static int +mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) +{ + int sqs_per_channel = mlx5e_get_dcb_num_tc(&priv->channels.params); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + bool is_uplink_rep = mlx5e_is_uplink_rep(priv); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + int n, tc, nch, num_sqs = 0; + struct mlx5e_channel *c; + int err = -ENOMEM; + bool ptp_sq; + u32 *sqs; + + ptp_sq = !!(priv->channels.ptp && + MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS)); + nch = priv->channels.num + ptp_sq; + /* +2 for xdpsqs, they don't exist on the ptp channel but will not be + * counted for by num_sqs. + */ + if (is_uplink_rep) + sqs_per_channel += 2; + + sqs = kvcalloc(nch * sqs_per_channel, sizeof(*sqs), GFP_KERNEL); + if (!sqs) + goto out; + + for (n = 0; n < priv->channels.num; n++) { + c = priv->channels.c[n]; + for (tc = 0; tc < c->num_tc; tc++) + sqs[num_sqs++] = c->sq[tc].sqn; + + if (is_uplink_rep) { + if (c->xdp) + sqs[num_sqs++] = c->rq_xdpsq.sqn; + + sqs[num_sqs++] = c->xdpsq.sqn; + } + } + if (ptp_sq) { + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + + for (tc = 0; tc < ptp_ch->num_tc; tc++) + sqs[num_sqs++] = ptp_ch->ptpsq[tc].txqsq.sqn; + } + + err = mlx5e_sqs2vport_start(esw, rep, sqs, num_sqs); + kvfree(sqs); + +out: + if (err) + netdev_warn(priv->netdev, "Failed to add SQs FWD rules %d\n", err); + return err; +} + +static void +mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + + mlx5e_sqs2vport_stop(esw, rep); +} + +static int +mlx5e_rep_add_meta_tunnel_rule(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_group *g; + + g = esw->fdb_table.offloads.send_to_vport_meta_grp; + if (!g) + return 0; + + flow_rule = mlx5_eswitch_add_send_to_vport_meta_rule(esw, rep->vport); + if (IS_ERR(flow_rule)) + return PTR_ERR(flow_rule); + + rpriv->send_to_vport_meta_rule = flow_rule; + + return 0; +} + +static void +mlx5e_rep_del_meta_tunnel_rule(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + if (rpriv->send_to_vport_meta_rule) + mlx5_eswitch_del_send_to_vport_meta_rule(rpriv->send_to_vport_meta_rule); +} + +void mlx5e_rep_activate_channels(struct mlx5e_priv *priv) +{ + mlx5e_add_sqs_fwd_rules(priv); + mlx5e_rep_add_meta_tunnel_rule(priv); +} + +void mlx5e_rep_deactivate_channels(struct mlx5e_priv *priv) +{ + mlx5e_rep_del_meta_tunnel_rule(priv); + mlx5e_remove_sqs_fwd_rules(priv); +} + +static int mlx5e_rep_open(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_open_locked(dev); + if (err) + goto unlock; + + if (!mlx5_modify_vport_admin_state(priv->mdev, + MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, + rep->vport, 1, + MLX5_VPORT_ADMIN_STATE_UP)) + netif_carrier_on(dev); + +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static int mlx5e_rep_close(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + int ret; + + mutex_lock(&priv->state_lock); + mlx5_modify_vport_admin_state(priv->mdev, + MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, + rep->vport, 1, + MLX5_VPORT_ADMIN_STATE_DOWN); + ret = mlx5e_close_locked(dev); + mutex_unlock(&priv->state_lock); + return ret; +} + +bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep; + + if (!MLX5_ESWITCH_MANAGER(priv->mdev)) + return false; + + if (!rpriv) /* non vport rep mlx5e instances don't use this field */ + return false; + + rep = rpriv->rep; + return (rep->vport == MLX5_VPORT_UPLINK); +} + +bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id) +{ + switch (attr_id) { + case IFLA_OFFLOAD_XSTATS_CPU_HIT: + return true; + } + + return false; +} + +static int +mlx5e_get_sw_stats64(const struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_fold_sw_stats64(priv, stats); + return 0; +} + +int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp) +{ + switch (attr_id) { + case IFLA_OFFLOAD_XSTATS_CPU_HIT: + return mlx5e_get_sw_stats64(dev, sp); + } + + return -EINVAL; +} + +static void +mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + /* update HW stats in background for next time */ + mlx5e_queue_update_stats(priv); + mlx5e_stats_copy_rep_stats(stats, &priv->stats.rep_stats); +} + +static int mlx5e_rep_change_mtu(struct net_device *netdev, int new_mtu) +{ + return mlx5e_change_mtu(netdev, new_mtu, NULL); +} + +static int mlx5e_rep_change_carrier(struct net_device *dev, bool new_carrier) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + int err; + + if (new_carrier) { + err = mlx5_modify_vport_admin_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, + rep->vport, 1, MLX5_VPORT_ADMIN_STATE_UP); + if (err) + return err; + netif_carrier_on(dev); + } else { + err = mlx5_modify_vport_admin_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, + rep->vport, 1, MLX5_VPORT_ADMIN_STATE_DOWN); + if (err) + return err; + netif_carrier_off(dev); + } + return 0; +} + +static const struct net_device_ops mlx5e_netdev_ops_rep = { + .ndo_open = mlx5e_rep_open, + .ndo_stop = mlx5e_rep_close, + .ndo_start_xmit = mlx5e_xmit, + .ndo_setup_tc = mlx5e_rep_setup_tc, + .ndo_get_stats64 = mlx5e_rep_get_stats, + .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, + .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, + .ndo_change_mtu = mlx5e_rep_change_mtu, + .ndo_change_carrier = mlx5e_rep_change_carrier, +}; + +bool mlx5e_eswitch_uplink_rep(const struct net_device *netdev) +{ + return netdev->netdev_ops == &mlx5e_netdev_ops && + mlx5e_is_uplink_rep(netdev_priv(netdev)); +} + +bool mlx5e_eswitch_vf_rep(const struct net_device *netdev) +{ + return netdev->netdev_ops == &mlx5e_netdev_ops_rep; +} + +/* One indirect TIR set for outer. Inner not supported in reps. */ +#define REP_NUM_INDIR_TIRS MLX5E_NUM_INDIR_TIRS + +static int mlx5e_rep_max_nch_limit(struct mlx5_core_dev *mdev) +{ + int max_tir_num = 1 << MLX5_CAP_GEN(mdev, log_max_tir); + int num_vports = mlx5_eswitch_get_total_vports(mdev); + + return (max_tir_num - mlx5e_get_pf_num_tirs(mdev) + - (num_vports * REP_NUM_INDIR_TIRS)) / num_vports; +} + +static void mlx5e_build_rep_params(struct net_device *netdev) +{ + const bool take_rtnl = netdev->reg_state == NETREG_REGISTERED; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_params *params; + + u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? + MLX5_CQ_PERIOD_MODE_START_FROM_CQE : + MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + + params = &priv->channels.params; + + params->num_channels = MLX5E_REP_PARAMS_DEF_NUM_CHANNELS; + params->hard_mtu = MLX5E_ETH_HARD_MTU; + params->sw_mtu = netdev->mtu; + + /* SQ */ + if (rep->vport == MLX5_VPORT_UPLINK) + params->log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; + else + params->log_sq_size = MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE; + + /* RQ */ + mlx5e_build_rq_params(mdev, params); + + /* If netdev is already registered (e.g. move from nic profile to uplink, + * RTNL lock must be held before triggering netdev notifiers. + */ + if (take_rtnl) + rtnl_lock(); + /* update XDP supported features */ + mlx5e_set_xdp_feature(netdev); + if (take_rtnl) + rtnl_unlock(); + + /* CQ moderation params */ + params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + mlx5e_set_rx_cq_mode_params(params, cq_period_mode); + + params->mqprio.num_tc = 1; + if (rep->vport != MLX5_VPORT_UPLINK) + params->vlan_strip_disable = true; + + mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); +} + +static void mlx5e_build_rep_netdev(struct net_device *netdev, + struct mlx5_core_dev *mdev) +{ + SET_NETDEV_DEV(netdev, mdev->device); + netdev->netdev_ops = &mlx5e_netdev_ops_rep; + eth_hw_addr_random(netdev); + netdev->ethtool_ops = &mlx5e_rep_ethtool_ops; + + netdev->watchdog_timeo = 15 * HZ; + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + netdev->hw_features |= NETIF_F_HW_TC; +#endif + netdev->hw_features |= NETIF_F_SG; + netdev->hw_features |= NETIF_F_IP_CSUM; + netdev->hw_features |= NETIF_F_IPV6_CSUM; + netdev->hw_features |= NETIF_F_GRO; + netdev->hw_features |= NETIF_F_TSO; + netdev->hw_features |= NETIF_F_TSO6; + netdev->hw_features |= NETIF_F_RXCSUM; + + netdev->features |= netdev->hw_features; + netdev->features |= NETIF_F_NETNS_LOCAL; +} + +static int mlx5e_init_rep(struct mlx5_core_dev *mdev, + struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + priv->fs = + mlx5e_fs_init(priv->profile, mdev, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state), + priv->dfs_root); + if (!priv->fs) { + netdev_err(priv->netdev, "FS allocation failed\n"); + return -ENOMEM; + } + + mlx5e_build_rep_params(netdev); + mlx5e_timestamp_init(priv); + + return 0; +} + +static int mlx5e_init_ul_rep(struct mlx5_core_dev *mdev, + struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + priv->dfs_root = debugfs_create_dir("nic", + mlx5_debugfs_get_dev_root(mdev)); + + priv->fs = mlx5e_fs_init(priv->profile, mdev, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state), + priv->dfs_root); + if (!priv->fs) { + netdev_err(priv->netdev, "FS allocation failed\n"); + debugfs_remove_recursive(priv->dfs_root); + return -ENOMEM; + } + + mlx5e_vxlan_set_netdev_info(priv); + mlx5e_build_rep_params(netdev); + mlx5e_timestamp_init(priv); + return 0; +} + +static void mlx5e_cleanup_rep(struct mlx5e_priv *priv) +{ + mlx5e_fs_cleanup(priv->fs); + debugfs_remove_recursive(priv->dfs_root); + priv->fs = NULL; +} + +static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct ttc_params ttc_params = {}; + int err; + + mlx5e_fs_set_ns(priv->fs, + mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_KERNEL), false); + + /* The inner_ttc in the ttc params is intentionally not set */ + mlx5e_set_ttc_params(priv->fs, priv->rx_res, &ttc_params, false); + + if (rep->vport != MLX5_VPORT_UPLINK) + /* To give uplik rep TTC a lower level for chaining from root ft */ + ttc_params.ft_attr.level = MLX5E_TTC_FT_LEVEL + 1; + + mlx5e_fs_set_ttc(priv->fs, mlx5_create_ttc_table(priv->mdev, &ttc_params), false); + if (IS_ERR(mlx5e_fs_get_ttc(priv->fs, false))) { + err = PTR_ERR(mlx5e_fs_get_ttc(priv->fs, false)); + netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n", + err); + return err; + } + return 0; +} + +static int mlx5e_create_rep_root_ft(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns; + int err = 0; + + if (rep->vport != MLX5_VPORT_UPLINK) { + /* non uplik reps will skip any bypass tables and go directly to + * their own ttc + */ + rpriv->root_ft = mlx5_get_ttc_flow_table(mlx5e_fs_get_ttc(priv->fs, false)); + return 0; + } + + /* uplink root ft will be used to auto chain, to ethtool or ttc tables */ + ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_OFFLOADS); + if (!ns) { + netdev_err(priv->netdev, "Failed to get reps offloads namespace\n"); + return -EOPNOTSUPP; + } + + ft_attr.max_fte = 0; /* Empty table, miss rule will always point to next table */ + ft_attr.prio = 1; + ft_attr.level = 1; + + rpriv->root_ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(rpriv->root_ft)) { + err = PTR_ERR(rpriv->root_ft); + rpriv->root_ft = NULL; + } + + return err; +} + +static void mlx5e_destroy_rep_root_ft(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + + if (rep->vport != MLX5_VPORT_UPLINK) + return; + mlx5_destroy_flow_table(rpriv->root_ft); +} + +static int mlx5e_create_rep_vport_rx_rule(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_destination dest; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = rpriv->root_ft; + + flow_rule = mlx5_eswitch_create_vport_rx_rule(esw, rep->vport, &dest); + if (IS_ERR(flow_rule)) + return PTR_ERR(flow_rule); + rpriv->vport_rx_rule = flow_rule; + return 0; +} + +static void rep_vport_rx_rule_destroy(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + if (!rpriv->vport_rx_rule) + return; + + mlx5_del_flow_rules(rpriv->vport_rx_rule); + rpriv->vport_rx_rule = NULL; +} + +int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup) +{ + rep_vport_rx_rule_destroy(priv); + + return cleanup ? 0 : mlx5e_create_rep_vport_rx_rule(priv); +} + +static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + priv->rx_res = mlx5e_rx_res_alloc(); + if (!priv->rx_res) { + err = -ENOMEM; + goto err_free_fs; + } + + mlx5e_fs_init_l2_addr(priv->fs, priv->netdev); + + err = mlx5e_open_drop_rq(priv, &priv->drop_rq); + if (err) { + mlx5_core_err(mdev, "open drop rq failed, %d\n", err); + goto err_rx_res_free; + } + + err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0, + priv->max_nch, priv->drop_rq.rqn, + &priv->channels.params.packet_merge, + priv->channels.params.num_channels); + if (err) + goto err_close_drop_rq; + + err = mlx5e_create_rep_ttc_table(priv); + if (err) + goto err_destroy_rx_res; + + err = mlx5e_create_rep_root_ft(priv); + if (err) + goto err_destroy_ttc_table; + + err = mlx5e_create_rep_vport_rx_rule(priv); + if (err) + goto err_destroy_root_ft; + + mlx5e_ethtool_init_steering(priv->fs); + + return 0; + +err_destroy_root_ft: + mlx5e_destroy_rep_root_ft(priv); +err_destroy_ttc_table: + mlx5_destroy_ttc_table(mlx5e_fs_get_ttc(priv->fs, false)); +err_destroy_rx_res: + mlx5e_rx_res_destroy(priv->rx_res); +err_close_drop_rq: + mlx5e_close_drop_rq(&priv->drop_rq); +err_rx_res_free: + mlx5e_rx_res_free(priv->rx_res); + priv->rx_res = NULL; +err_free_fs: + mlx5e_fs_cleanup(priv->fs); + priv->fs = NULL; + return err; +} + +static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) +{ + mlx5e_ethtool_cleanup_steering(priv->fs); + rep_vport_rx_rule_destroy(priv); + mlx5e_destroy_rep_root_ft(priv); + mlx5_destroy_ttc_table(mlx5e_fs_get_ttc(priv->fs, false)); + mlx5e_rx_res_destroy(priv->rx_res); + mlx5e_close_drop_rq(&priv->drop_rq); + mlx5e_rx_res_free(priv->rx_res); + priv->rx_res = NULL; +} + +static void mlx5e_rep_mpesw_work(struct work_struct *work) +{ + struct mlx5_rep_uplink_priv *uplink_priv = + container_of(work, struct mlx5_rep_uplink_priv, + mpesw_work); + struct mlx5e_rep_priv *rpriv = + container_of(uplink_priv, struct mlx5e_rep_priv, + uplink_priv); + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + + rep_vport_rx_rule_destroy(priv); + mlx5e_create_rep_vport_rx_rule(priv); +} + +static int mlx5e_init_ul_rep_rx(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + int err; + + mlx5e_create_q_counters(priv); + err = mlx5e_init_rep_rx(priv); + if (err) + goto out; + + mlx5e_tc_int_port_init_rep_rx(priv); + + INIT_WORK(&rpriv->uplink_priv.mpesw_work, mlx5e_rep_mpesw_work); + +out: + return err; +} + +static void mlx5e_cleanup_ul_rep_rx(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + cancel_work_sync(&rpriv->uplink_priv.mpesw_work); + mlx5e_tc_int_port_cleanup_rep_rx(priv); + mlx5e_cleanup_rep_rx(priv); + mlx5e_destroy_q_counters(priv); +} + +static int mlx5e_init_uplink_rep_tx(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5_rep_uplink_priv *uplink_priv; + struct net_device *netdev; + struct mlx5e_priv *priv; + int err; + + netdev = rpriv->netdev; + priv = netdev_priv(netdev); + uplink_priv = &rpriv->uplink_priv; + + err = mlx5e_rep_tc_init(rpriv); + if (err) + return err; + + mlx5_init_port_tun_entropy(&uplink_priv->tun_entropy, priv->mdev); + + mlx5e_rep_bond_init(rpriv); + err = mlx5e_rep_tc_netdevice_event_register(rpriv); + if (err) { + mlx5_core_err(priv->mdev, "Failed to register netdev notifier, err: %d\n", + err); + goto err_event_reg; + } + + return 0; + +err_event_reg: + mlx5e_rep_bond_cleanup(rpriv); + mlx5e_rep_tc_cleanup(rpriv); + return err; +} + +static void mlx5e_cleanup_uplink_rep_tx(struct mlx5e_rep_priv *rpriv) +{ + mlx5e_rep_tc_netdevice_event_unregister(rpriv); + mlx5e_rep_bond_cleanup(rpriv); + mlx5e_rep_tc_cleanup(rpriv); +} + +static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + int err; + + err = mlx5e_create_tises(priv); + if (err) { + mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err); + return err; + } + + err = mlx5e_rep_neigh_init(rpriv); + if (err) + goto err_neigh_init; + + if (rpriv->rep->vport == MLX5_VPORT_UPLINK) { + err = mlx5e_init_uplink_rep_tx(rpriv); + if (err) + goto err_init_tx; + } + + err = mlx5e_tc_ht_init(&rpriv->tc_ht); + if (err) + goto err_ht_init; + + return 0; + +err_ht_init: + if (rpriv->rep->vport == MLX5_VPORT_UPLINK) + mlx5e_cleanup_uplink_rep_tx(rpriv); +err_init_tx: + mlx5e_rep_neigh_cleanup(rpriv); +err_neigh_init: + mlx5e_destroy_tises(priv); + return err; +} + +static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + mlx5e_tc_ht_cleanup(&rpriv->tc_ht); + + if (rpriv->rep->vport == MLX5_VPORT_UPLINK) + mlx5e_cleanup_uplink_rep_tx(rpriv); + + mlx5e_rep_neigh_cleanup(rpriv); + mlx5e_destroy_tises(priv); +} + +static void mlx5e_rep_enable(struct mlx5e_priv *priv) +{ + mlx5e_set_netdev_mtu_boundaries(priv); +} + +static void mlx5e_rep_disable(struct mlx5e_priv *priv) +{ +} + +static int mlx5e_update_rep_rx(struct mlx5e_priv *priv) +{ + return 0; +} + +static int mlx5e_rep_event_mpesw(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + + if (rep->vport != MLX5_VPORT_UPLINK) + return NOTIFY_DONE; + + queue_work(priv->wq, &rpriv->uplink_priv.mpesw_work); + + return NOTIFY_OK; +} + +static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb); + + if (event == MLX5_EVENT_TYPE_PORT_CHANGE) { + struct mlx5_eqe *eqe = data; + + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + queue_work(priv->wq, &priv->update_carrier_work); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; + } + + if (event == MLX5_DEV_EVENT_PORT_AFFINITY) + return mlx5e_rep_tc_event_port_affinity(priv); + else if (event == MLX5_DEV_EVENT_MULTIPORT_ESW) + return mlx5e_rep_event_mpesw(priv); + + return NOTIFY_DONE; +} + +static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + u16 max_mtu; + + mlx5e_ipsec_init(priv); + + netdev->min_mtu = ETH_MIN_MTU; + mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1); + netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu); + mlx5e_set_dev_port_mtu(priv); + + mlx5e_rep_tc_enable(priv); + + if (MLX5_CAP_GEN(mdev, uplink_follow)) + mlx5_modify_vport_admin_state(mdev, MLX5_VPORT_STATE_OP_MOD_UPLINK, + 0, 0, MLX5_VPORT_ADMIN_STATE_AUTO); + mlx5_lag_add_netdev(mdev, netdev); + priv->events_nb.notifier_call = uplink_rep_async_event; + mlx5_notifier_register(mdev, &priv->events_nb); + mlx5e_dcbnl_initialize(priv); + mlx5e_dcbnl_init_app(priv); + mlx5e_rep_bridge_init(priv); + + netdev->wanted_features |= NETIF_F_HW_TC; + + rtnl_lock(); + if (netif_running(netdev)) + mlx5e_open(netdev); + udp_tunnel_nic_reset_ntf(priv->netdev); + netif_device_attach(netdev); + rtnl_unlock(); +} + +static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + rtnl_lock(); + if (netif_running(priv->netdev)) + mlx5e_close(priv->netdev); + netif_device_detach(priv->netdev); + rtnl_unlock(); + + mlx5e_rep_bridge_cleanup(priv); + mlx5e_dcbnl_delete_app(priv); + mlx5_notifier_unregister(mdev, &priv->events_nb); + mlx5e_rep_tc_disable(priv); + mlx5_lag_remove_netdev(mdev, priv->netdev); + mlx5_vxlan_reset_to_default(mdev->vxlan); + + mlx5e_ipsec_cleanup(priv); +} + +static MLX5E_DEFINE_STATS_GRP(sw_rep, 0); +static MLX5E_DEFINE_STATS_GRP(vport_rep, MLX5E_NDO_UPDATE_STATS); + +/* The stats groups order is opposite to the update_stats() order calls */ +static mlx5e_stats_grp_t mlx5e_rep_stats_grps[] = { + &MLX5E_STATS_GRP(sw_rep), + &MLX5E_STATS_GRP(vport_rep), +}; + +static unsigned int mlx5e_rep_stats_grps_num(struct mlx5e_priv *priv) +{ + return ARRAY_SIZE(mlx5e_rep_stats_grps); +} + +/* The stats groups order is opposite to the update_stats() order calls */ +static mlx5e_stats_grp_t mlx5e_ul_rep_stats_grps[] = { + &MLX5E_STATS_GRP(sw), + &MLX5E_STATS_GRP(qcnt), + &MLX5E_STATS_GRP(vnic_env), + &MLX5E_STATS_GRP(vport), + &MLX5E_STATS_GRP(802_3), + &MLX5E_STATS_GRP(2863), + &MLX5E_STATS_GRP(2819), + &MLX5E_STATS_GRP(phy), + &MLX5E_STATS_GRP(eth_ext), + &MLX5E_STATS_GRP(pcie), + &MLX5E_STATS_GRP(per_prio), + &MLX5E_STATS_GRP(pme), + &MLX5E_STATS_GRP(channels), + &MLX5E_STATS_GRP(per_port_buff_congest), +#ifdef CONFIG_MLX5_EN_IPSEC + &MLX5E_STATS_GRP(ipsec_hw), + &MLX5E_STATS_GRP(ipsec_sw), +#endif + &MLX5E_STATS_GRP(ptp), +}; + +static unsigned int mlx5e_ul_rep_stats_grps_num(struct mlx5e_priv *priv) +{ + return ARRAY_SIZE(mlx5e_ul_rep_stats_grps); +} + +static int +mlx5e_rep_vnic_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct mlx5e_rep_priv *rpriv = devlink_health_reporter_priv(reporter); + struct mlx5_eswitch_rep *rep = rpriv->rep; + + return mlx5_reporter_vnic_diagnose_counters(rep->esw->dev, fmsg, + rep->vport, true); +} + +static const struct devlink_health_reporter_ops mlx5_rep_vnic_reporter_ops = { + .name = "vnic", + .diagnose = mlx5e_rep_vnic_reporter_diagnose, +}; + +static void mlx5e_rep_vnic_reporter_create(struct mlx5e_priv *priv, + struct devlink_port *dl_port) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct devlink_health_reporter *reporter; + + reporter = devl_port_health_reporter_create(dl_port, + &mlx5_rep_vnic_reporter_ops, + 0, rpriv); + if (IS_ERR(reporter)) { + mlx5_core_err(priv->mdev, + "Failed to create representor vnic reporter, err = %ld\n", + PTR_ERR(reporter)); + return; + } + + rpriv->rep_vnic_reporter = reporter; +} + +static void mlx5e_rep_vnic_reporter_destroy(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + if (!IS_ERR_OR_NULL(rpriv->rep_vnic_reporter)) + devl_health_reporter_destroy(rpriv->rep_vnic_reporter); +} + +static const struct mlx5e_profile mlx5e_rep_profile = { + .init = mlx5e_init_rep, + .cleanup = mlx5e_cleanup_rep, + .init_rx = mlx5e_init_rep_rx, + .cleanup_rx = mlx5e_cleanup_rep_rx, + .init_tx = mlx5e_init_rep_tx, + .cleanup_tx = mlx5e_cleanup_rep_tx, + .enable = mlx5e_rep_enable, + .disable = mlx5e_rep_disable, + .update_rx = mlx5e_update_rep_rx, + .update_stats = mlx5e_stats_update_ndo_stats, + .rx_handlers = &mlx5e_rx_handlers_rep, + .max_tc = 1, + .stats_grps = mlx5e_rep_stats_grps, + .stats_grps_num = mlx5e_rep_stats_grps_num, + .max_nch_limit = mlx5e_rep_max_nch_limit, +}; + +static const struct mlx5e_profile mlx5e_uplink_rep_profile = { + .init = mlx5e_init_ul_rep, + .cleanup = mlx5e_cleanup_rep, + .init_rx = mlx5e_init_ul_rep_rx, + .cleanup_rx = mlx5e_cleanup_ul_rep_rx, + .init_tx = mlx5e_init_rep_tx, + .cleanup_tx = mlx5e_cleanup_rep_tx, + .enable = mlx5e_uplink_rep_enable, + .disable = mlx5e_uplink_rep_disable, + .update_rx = mlx5e_update_rep_rx, + .update_stats = mlx5e_stats_update_ndo_stats, + .update_carrier = mlx5e_update_carrier, + .rx_handlers = &mlx5e_rx_handlers_rep, + .max_tc = MLX5E_MAX_NUM_TC, + .stats_grps = mlx5e_ul_rep_stats_grps, + .stats_grps_num = mlx5e_ul_rep_stats_grps_num, +}; + +/* e-Switch vport representors */ +static int +mlx5e_vport_uplink_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_priv *priv = netdev_priv(mlx5_uplink_netdev_get(dev)); + struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); + + rpriv->netdev = priv->netdev; + return mlx5e_netdev_change_profile(priv, &mlx5e_uplink_rep_profile, + rpriv); +} + +static void +mlx5e_vport_uplink_rep_unload(struct mlx5e_rep_priv *rpriv) +{ + struct net_device *netdev = rpriv->netdev; + struct mlx5e_priv *priv; + + priv = netdev_priv(netdev); + + mlx5e_netdev_attach_nic_profile(priv); +} + +static int +mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); + const struct mlx5e_profile *profile; + struct devlink_port *dl_port; + struct net_device *netdev; + struct mlx5e_priv *priv; + int err; + + profile = &mlx5e_rep_profile; + netdev = mlx5e_create_netdev(dev, profile); + if (!netdev) { + mlx5_core_warn(dev, + "Failed to create representor netdev for vport %d\n", + rep->vport); + return -EINVAL; + } + + mlx5e_build_rep_netdev(netdev, dev); + rpriv->netdev = netdev; + + priv = netdev_priv(netdev); + priv->profile = profile; + priv->ppriv = rpriv; + err = profile->init(dev, netdev); + if (err) { + netdev_warn(netdev, "rep profile init failed, %d\n", err); + goto err_destroy_netdev; + } + + err = mlx5e_attach_netdev(netdev_priv(netdev)); + if (err) { + netdev_warn(netdev, + "Failed to attach representor netdev for vport %d\n", + rep->vport); + goto err_cleanup_profile; + } + + dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, + rpriv->rep->vport); + if (!IS_ERR(dl_port)) { + SET_NETDEV_DEVLINK_PORT(netdev, dl_port); + mlx5e_rep_vnic_reporter_create(priv, dl_port); + } + + err = register_netdev(netdev); + if (err) { + netdev_warn(netdev, + "Failed to register representor netdev for vport %d\n", + rep->vport); + goto err_detach_netdev; + } + + return 0; + +err_detach_netdev: + mlx5e_rep_vnic_reporter_destroy(priv); + mlx5e_detach_netdev(netdev_priv(netdev)); +err_cleanup_profile: + priv->profile->cleanup(priv); + +err_destroy_netdev: + mlx5e_destroy_netdev(netdev_priv(netdev)); + return err; +} + +static int +mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_priv *rpriv; + int err; + + rpriv = kvzalloc(sizeof(*rpriv), GFP_KERNEL); + if (!rpriv) + return -ENOMEM; + + /* rpriv->rep to be looked up when profile->init() is called */ + rpriv->rep = rep; + rep->rep_data[REP_ETH].priv = rpriv; + INIT_LIST_HEAD(&rpriv->vport_sqs_list); + + if (rep->vport == MLX5_VPORT_UPLINK) + err = mlx5e_vport_uplink_rep_load(dev, rep); + else + err = mlx5e_vport_vf_rep_load(dev, rep); + + if (err) + kvfree(rpriv); + + return err; +} + +static void +mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); + struct net_device *netdev = rpriv->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + void *ppriv = priv->ppriv; + + if (rep->vport == MLX5_VPORT_UPLINK) { + mlx5e_vport_uplink_rep_unload(rpriv); + goto free_ppriv; + } + + unregister_netdev(netdev); + mlx5e_rep_vnic_reporter_destroy(priv); + mlx5e_detach_netdev(priv); + priv->profile->cleanup(priv); + mlx5e_destroy_netdev(priv); +free_ppriv: + kvfree(ppriv); /* mlx5e_rep_priv */ +} + +static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_priv *rpriv; + + rpriv = mlx5e_rep_to_rep_priv(rep); + + return rpriv->netdev; +} + +static void mlx5e_vport_rep_event_unpair(struct mlx5_eswitch_rep *rep, + struct mlx5_eswitch *peer_esw) +{ + u16 i = MLX5_CAP_GEN(peer_esw->dev, vhca_id); + struct mlx5e_rep_priv *rpriv; + struct mlx5e_rep_sq *rep_sq; + + WARN_ON_ONCE(!peer_esw); + rpriv = mlx5e_rep_to_rep_priv(rep); + list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) { + struct mlx5e_rep_sq_peer *sq_peer = xa_load(&rep_sq->sq_peer, i); + + if (!sq_peer || sq_peer->peer != peer_esw) + continue; + + mlx5_eswitch_del_send_to_vport_rule(sq_peer->rule); + xa_erase(&rep_sq->sq_peer, i); + kfree(sq_peer); + } +} + +static int mlx5e_vport_rep_event_pair(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, + struct mlx5_eswitch *peer_esw) +{ + u16 i = MLX5_CAP_GEN(peer_esw->dev, vhca_id); + struct mlx5_flow_handle *flow_rule; + struct mlx5e_rep_sq_peer *sq_peer; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_rep_sq *rep_sq; + int err; + + rpriv = mlx5e_rep_to_rep_priv(rep); + list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) { + sq_peer = xa_load(&rep_sq->sq_peer, i); + + if (sq_peer && sq_peer->peer) + continue; + + flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, rep, + rep_sq->sqn); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + goto err_out; + } + + if (sq_peer) { + sq_peer->rule = flow_rule; + sq_peer->peer = peer_esw; + continue; + } + sq_peer = kzalloc(sizeof(*sq_peer), GFP_KERNEL); + if (!sq_peer) { + err = -ENOMEM; + goto err_sq_alloc; + } + err = xa_insert(&rep_sq->sq_peer, i, sq_peer, GFP_KERNEL); + if (err) + goto err_xa; + sq_peer->rule = flow_rule; + sq_peer->peer = peer_esw; + } + + return 0; +err_xa: + kfree(sq_peer); +err_sq_alloc: + mlx5_eswitch_del_send_to_vport_rule(flow_rule); +err_out: + mlx5e_vport_rep_event_unpair(rep, peer_esw); + return err; +} + +static int mlx5e_vport_rep_event(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, + enum mlx5_switchdev_event event, + void *data) +{ + int err = 0; + + if (event == MLX5_SWITCHDEV_EVENT_PAIR) + err = mlx5e_vport_rep_event_pair(esw, rep, data); + else if (event == MLX5_SWITCHDEV_EVENT_UNPAIR) + mlx5e_vport_rep_event_unpair(rep, data); + + return err; +} + +static const struct mlx5_eswitch_rep_ops rep_ops = { + .load = mlx5e_vport_rep_load, + .unload = mlx5e_vport_rep_unload, + .get_proto_dev = mlx5e_vport_rep_get_proto_dev, + .event = mlx5e_vport_rep_event, +}; + +static int mlx5e_rep_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) +{ + struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); + struct mlx5_core_dev *mdev = edev->mdev; + struct mlx5_eswitch *esw; + + esw = mdev->priv.eswitch; + mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_ETH); + return 0; +} + +static void mlx5e_rep_remove(struct auxiliary_device *adev) +{ + struct mlx5_adev *vdev = container_of(adev, struct mlx5_adev, adev); + struct mlx5_core_dev *mdev = vdev->mdev; + struct mlx5_eswitch *esw; + + esw = mdev->priv.eswitch; + mlx5_eswitch_unregister_vport_reps(esw, REP_ETH); +} + +static const struct auxiliary_device_id mlx5e_rep_id_table[] = { + { .name = MLX5_ADEV_NAME ".eth-rep", }, + {}, +}; + +MODULE_DEVICE_TABLE(auxiliary, mlx5e_rep_id_table); + +static struct auxiliary_driver mlx5e_rep_driver = { + .name = "eth-rep", + .probe = mlx5e_rep_probe, + .remove = mlx5e_rep_remove, + .id_table = mlx5e_rep_id_table, +}; + +int mlx5e_rep_init(void) +{ + return auxiliary_driver_register(&mlx5e_rep_driver); +} + +void mlx5e_rep_cleanup(void) +{ + auxiliary_driver_unregister(&mlx5e_rep_driver); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h new file mode 100644 index 0000000000..70640fa1ad --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -0,0 +1,286 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5E_REP_H__ +#define __MLX5E_REP_H__ + +#include +#include +#include +#include "eswitch.h" +#include "en.h" +#include "lib/port_tun.h" + +#ifdef CONFIG_MLX5_ESWITCH +extern const struct mlx5e_rx_handlers mlx5e_rx_handlers_rep; + +struct mlx5e_neigh_update_table { + struct rhashtable neigh_ht; + /* Save the neigh hash entries in a list in addition to the hash table + * (neigh_ht). In order to iterate easily over the neigh entries. + * Used for stats query. + */ + struct list_head neigh_list; + /* protect lookup/remove operations */ + struct mutex encap_lock; + struct notifier_block netevent_nb; + struct delayed_work neigh_stats_work; + unsigned long min_interval; /* jiffies */ +}; + +struct mlx5_tc_ct_priv; +struct mlx5_tc_int_port_priv; +struct mlx5e_rep_bond; +struct mlx5e_tc_tun_encap; +struct mlx5e_post_act; +struct mlx5e_flow_meters; + +struct mlx5_rep_uplink_priv { + /* indirect block callbacks are invoked on bind/unbind events + * on registered higher level devices (e.g. tunnel devices) + * + * tc_indr_block_cb_priv_list is used to lookup indirect callback + * private data + * + */ + struct list_head tc_indr_block_priv_list; + + struct mlx5_tun_entropy tun_entropy; + + /* protects unready_flows */ + struct mutex unready_flows_lock; + struct list_head unready_flows; + struct work_struct reoffload_flows_work; + + /* maps tun_info to a unique id*/ + struct mapping_ctx *tunnel_mapping; + /* maps tun_enc_opts to a unique id*/ + struct mapping_ctx *tunnel_enc_opts_mapping; + + struct mlx5e_post_act *post_act; + struct mlx5_tc_ct_priv *ct_priv; + struct mlx5e_tc_psample *tc_psample; + + /* support eswitch vports bonding */ + struct mlx5e_rep_bond *bond; + + /* tc tunneling encapsulation private data */ + struct mlx5e_tc_tun_encap *encap; + + /* OVS internal port support */ + struct mlx5e_tc_int_port_priv *int_port_priv; + + struct mlx5e_flow_meters *flow_meters; + + /* tc action stats */ + struct mlx5e_tc_act_stats_handle *action_stats_handle; + + struct work_struct mpesw_work; +}; + +struct mlx5e_rep_priv { + struct mlx5_eswitch_rep *rep; + struct mlx5e_neigh_update_table neigh_update; + struct net_device *netdev; + struct mlx5_flow_table *root_ft; + struct mlx5_flow_handle *vport_rx_rule; + struct list_head vport_sqs_list; + struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */ + struct rtnl_link_stats64 prev_vf_vport_stats; + struct mlx5_flow_handle *send_to_vport_meta_rule; + struct rhashtable tc_ht; + struct devlink_health_reporter *rep_vnic_reporter; +}; + +static inline +struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep) +{ + return rep->rep_data[REP_ETH].priv; +} + +struct mlx5e_neigh { + union { + __be32 v4; + struct in6_addr v6; + } dst_ip; + int family; +}; + +struct mlx5e_neigh_hash_entry { + struct rhash_head rhash_node; + struct mlx5e_neigh m_neigh; + struct mlx5e_priv *priv; + struct net_device *neigh_dev; + + /* Save the neigh hash entry in a list on the representor in + * addition to the hash table. In order to iterate easily over the + * neighbour entries. Used for stats query. + */ + struct list_head neigh_list; + + /* protects encap list */ + spinlock_t encap_list_lock; + /* encap list sharing the same neigh */ + struct list_head encap_list; + + /* neigh hash entry can be deleted only when the refcount is zero. + * refcount is needed to avoid neigh hash entry removal by TC, while + * it's used by the neigh notification call. + */ + refcount_t refcnt; + + /* Save the last reported time offloaded traffic pass over one of the + * neigh hash entry flows. Use it to periodically update the neigh + * 'used' value and avoid neigh deleting by the kernel. + */ + unsigned long reported_lastuse; + + struct rcu_head rcu; +}; + +enum { + /* set when the encap entry is successfully offloaded into HW */ + MLX5_ENCAP_ENTRY_VALID = BIT(0), + MLX5_REFORMAT_DECAP = BIT(1), + MLX5_ENCAP_ENTRY_NO_ROUTE = BIT(2), +}; + +struct mlx5e_decap_key { + struct ethhdr key; +}; + +struct mlx5e_decap_entry { + struct mlx5e_decap_key key; + struct list_head flows; + struct hlist_node hlist; + refcount_t refcnt; + struct completion res_ready; + int compl_result; + struct mlx5_pkt_reformat *pkt_reformat; + struct rcu_head rcu; +}; + +struct mlx5e_mpls_info { + u32 label; + u8 tc; + u8 bos; + u8 ttl; +}; + +struct mlx5e_encap_entry { + /* attached neigh hash entry */ + struct mlx5e_neigh_hash_entry *nhe; + /* neigh hash entry list of encaps sharing the same neigh */ + struct list_head encap_list; + /* a node of the eswitch encap hash table which keeping all the encap + * entries + */ + struct hlist_node encap_hlist; + struct list_head flows; + struct list_head route_list; + struct mlx5_pkt_reformat *pkt_reformat; + const struct ip_tunnel_info *tun_info; + struct mlx5e_mpls_info mpls_info; + unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ + + struct net_device *out_dev; + int route_dev_ifindex; + struct mlx5e_tc_tunnel *tunnel; + int reformat_type; + u8 flags; + char *encap_header; + int encap_size; + refcount_t refcnt; + struct completion res_ready; + int compl_result; + struct rcu_head rcu; +}; + +struct mlx5e_rep_sq_peer { + struct mlx5_flow_handle *rule; + void *peer; +}; + +struct mlx5e_rep_sq { + struct mlx5_flow_handle *send_to_vport_rule; + struct xarray sq_peer; + u32 sqn; + struct list_head list; +}; + +int mlx5e_rep_init(void); +void mlx5e_rep_cleanup(void); +int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv); +void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv); +int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev, + struct net_device *lag_dev); +void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw, + const struct net_device *netdev, + const struct net_device *lag_dev); +int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup); + +bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id); +int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp); + +bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv); +void mlx5e_rep_activate_channels(struct mlx5e_priv *priv); +void mlx5e_rep_deactivate_channels(struct mlx5e_priv *priv); + +void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv); + +bool mlx5e_eswitch_vf_rep(const struct net_device *netdev); +bool mlx5e_eswitch_uplink_rep(const struct net_device *netdev); +static inline bool mlx5e_eswitch_rep(const struct net_device *netdev) +{ + return mlx5e_eswitch_vf_rep(netdev) || + mlx5e_eswitch_uplink_rep(netdev); +} + +#else /* CONFIG_MLX5_ESWITCH */ +static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; } +static inline void mlx5e_rep_activate_channels(struct mlx5e_priv *priv) {} +static inline void mlx5e_rep_deactivate_channels(struct mlx5e_priv *priv) {} +static inline int mlx5e_rep_init(void) { return 0; }; +static inline void mlx5e_rep_cleanup(void) {}; +static inline bool mlx5e_rep_has_offload_stats(const struct net_device *dev, + int attr_id) { return false; } +static inline int mlx5e_rep_get_offload_stats(int attr_id, + const struct net_device *dev, + void *sp) { return -EOPNOTSUPP; } +#endif + +static inline bool mlx5e_is_vport_rep(struct mlx5e_priv *priv) +{ + return (MLX5_ESWITCH_MANAGER(priv->mdev) && priv->ppriv); +} +#endif /* __MLX5E_REP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c new file mode 100644 index 0000000000..8d9743a5e4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -0,0 +1,2774 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "en.h" +#include "en/txrx.h" +#include "en_tc.h" +#include "eswitch.h" +#include "en_rep.h" +#include "en/rep/tc.h" +#include "ipoib/ipoib.h" +#include "en_accel/ipsec.h" +#include "en_accel/macsec.h" +#include "en_accel/ipsec_rxtx.h" +#include "en_accel/ktls_txrx.h" +#include "en/xdp.h" +#include "en/xsk/rx.h" +#include "en/health.h" +#include "en/params.h" +#include "devlink.h" +#include "en/devlink.h" + +static struct sk_buff * +mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset, + u32 page_idx); +static struct sk_buff * +mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset, + u32 page_idx); +static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); +static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); +static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); + +const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic = { + .handle_rx_cqe = mlx5e_handle_rx_cqe, + .handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, + .handle_rx_cqe_mpwqe_shampo = mlx5e_handle_rx_cqe_mpwrq_shampo, +}; + +static inline void mlx5e_read_cqe_slot(struct mlx5_cqwq *wq, + u32 cqcc, void *data) +{ + u32 ci = mlx5_cqwq_ctr2ix(wq, cqcc); + + memcpy(data, mlx5_cqwq_get_wqe(wq, ci), sizeof(struct mlx5_cqe64)); +} + +static void mlx5e_read_enhanced_title_slot(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe) +{ + struct mlx5e_cq_decomp *cqd = &rq->cqd; + struct mlx5_cqe64 *title = &cqd->title; + + memcpy(title, cqe, sizeof(struct mlx5_cqe64)); + + if (likely(test_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state))) + return; + + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + cqd->wqe_counter = mpwrq_get_cqe_stride_index(title) + + mpwrq_get_cqe_consumed_strides(title); + else + cqd->wqe_counter = + mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, be16_to_cpu(title->wqe_counter) + 1); +} + +static inline void mlx5e_read_title_slot(struct mlx5e_rq *rq, + struct mlx5_cqwq *wq, + u32 cqcc) +{ + struct mlx5e_cq_decomp *cqd = &rq->cqd; + struct mlx5_cqe64 *title = &cqd->title; + + mlx5e_read_cqe_slot(wq, cqcc, title); + cqd->left = be32_to_cpu(title->byte_cnt); + cqd->wqe_counter = be16_to_cpu(title->wqe_counter); + rq->stats->cqe_compress_blks++; +} + +static inline void mlx5e_read_mini_arr_slot(struct mlx5_cqwq *wq, + struct mlx5e_cq_decomp *cqd, + u32 cqcc) +{ + mlx5e_read_cqe_slot(wq, cqcc, cqd->mini_arr); + cqd->mini_arr_idx = 0; +} + +static inline void mlx5e_cqes_update_owner(struct mlx5_cqwq *wq, int n) +{ + u32 cqcc = wq->cc; + u8 op_own = mlx5_cqwq_get_ctr_wrap_cnt(wq, cqcc) & 1; + u32 ci = mlx5_cqwq_ctr2ix(wq, cqcc); + u32 wq_sz = mlx5_cqwq_get_size(wq); + u32 ci_top = min_t(u32, wq_sz, ci + n); + + for (; ci < ci_top; ci++, n--) { + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci); + + cqe->op_own = op_own; + } + + if (unlikely(ci == wq_sz)) { + op_own = !op_own; + for (ci = 0; ci < n; ci++) { + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci); + + cqe->op_own = op_own; + } + } +} + +static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq, + struct mlx5_cqwq *wq, + u32 cqcc) +{ + struct mlx5e_cq_decomp *cqd = &rq->cqd; + struct mlx5_mini_cqe8 *mini_cqe = &cqd->mini_arr[cqd->mini_arr_idx]; + struct mlx5_cqe64 *title = &cqd->title; + + title->byte_cnt = mini_cqe->byte_cnt; + title->check_sum = mini_cqe->checksum; + title->op_own &= 0xf0; + title->op_own |= 0x01 & (cqcc >> wq->fbc.log_sz); + + /* state bit set implies linked-list striding RQ wq type and + * HW stride index capability supported + */ + if (test_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state)) { + title->wqe_counter = mini_cqe->stridx; + return; + } + + /* HW stride index capability not supported */ + title->wqe_counter = cpu_to_be16(cqd->wqe_counter); + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + cqd->wqe_counter += mpwrq_get_cqe_consumed_strides(title); + else + cqd->wqe_counter = + mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, cqd->wqe_counter + 1); +} + +static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq, + struct mlx5_cqwq *wq, + u32 cqcc) +{ + struct mlx5e_cq_decomp *cqd = &rq->cqd; + + mlx5e_decompress_cqe(rq, wq, cqcc); + cqd->title.rss_hash_type = 0; + cqd->title.rss_hash_result = 0; +} + +static u32 mlx5e_decompress_enhanced_cqe(struct mlx5e_rq *rq, + struct mlx5_cqwq *wq, + struct mlx5_cqe64 *cqe, + int budget_rem) +{ + struct mlx5e_cq_decomp *cqd = &rq->cqd; + u32 cqcc, left; + u32 i; + + left = get_cqe_enhanced_num_mini_cqes(cqe); + /* Here we avoid breaking the cqe compression session in the middle + * in case budget is not sufficient to handle all of it. In this case + * we return work_done == budget_rem to give 'busy' napi indication. + */ + if (unlikely(left > budget_rem)) + return budget_rem; + + cqcc = wq->cc; + cqd->mini_arr_idx = 0; + memcpy(cqd->mini_arr, cqe, sizeof(struct mlx5_cqe64)); + for (i = 0; i < left; i++, cqd->mini_arr_idx++, cqcc++) { + mlx5e_decompress_cqe_no_hash(rq, wq, cqcc); + INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq, + mlx5e_handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq_shampo, + rq, &cqd->title); + } + wq->cc = cqcc; + rq->stats->cqe_compress_pkts += left; + + return left; +} + +static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq, + struct mlx5_cqwq *wq, + int update_owner_only, + int budget_rem) +{ + struct mlx5e_cq_decomp *cqd = &rq->cqd; + u32 cqcc = wq->cc + update_owner_only; + u32 cqe_count; + u32 i; + + cqe_count = min_t(u32, cqd->left, budget_rem); + + for (i = update_owner_only; i < cqe_count; + i++, cqd->mini_arr_idx++, cqcc++) { + if (cqd->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE) + mlx5e_read_mini_arr_slot(wq, cqd, cqcc); + + mlx5e_decompress_cqe_no_hash(rq, wq, cqcc); + INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq, + mlx5e_handle_rx_cqe_mpwrq_shampo, mlx5e_handle_rx_cqe, + rq, &cqd->title); + } + mlx5e_cqes_update_owner(wq, cqcc - wq->cc); + wq->cc = cqcc; + cqd->left -= cqe_count; + rq->stats->cqe_compress_pkts += cqe_count; + + return cqe_count; +} + +static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, + struct mlx5_cqwq *wq, + int budget_rem) +{ + struct mlx5e_cq_decomp *cqd = &rq->cqd; + u32 cc = wq->cc; + + mlx5e_read_title_slot(rq, wq, cc); + mlx5e_read_mini_arr_slot(wq, cqd, cc + 1); + mlx5e_decompress_cqe(rq, wq, cc); + INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq, + mlx5e_handle_rx_cqe_mpwrq_shampo, mlx5e_handle_rx_cqe, + rq, &cqd->title); + cqd->mini_arr_idx++; + + return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem); +} + +#define MLX5E_PAGECNT_BIAS_MAX (PAGE_SIZE / 64) + +static int mlx5e_page_alloc_fragmented(struct mlx5e_rq *rq, + struct mlx5e_frag_page *frag_page) +{ + struct page *page; + + page = page_pool_dev_alloc_pages(rq->page_pool); + if (unlikely(!page)) + return -ENOMEM; + + page_pool_fragment_page(page, MLX5E_PAGECNT_BIAS_MAX); + + *frag_page = (struct mlx5e_frag_page) { + .page = page, + .frags = 0, + }; + + return 0; +} + +static void mlx5e_page_release_fragmented(struct mlx5e_rq *rq, + struct mlx5e_frag_page *frag_page) +{ + u16 drain_count = MLX5E_PAGECNT_BIAS_MAX - frag_page->frags; + struct page *page = frag_page->page; + + if (page_pool_defrag_page(page, drain_count) == 0) + page_pool_put_defragged_page(rq->page_pool, page, -1, true); +} + +static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, + struct mlx5e_wqe_frag_info *frag) +{ + int err = 0; + + if (!frag->offset) + /* On first frag (offset == 0), replenish page. + * Other frags that point to the same page (with a different + * offset) should just use the new one without replenishing again + * by themselves. + */ + err = mlx5e_page_alloc_fragmented(rq, frag->frag_page); + + return err; +} + +static bool mlx5e_frag_can_release(struct mlx5e_wqe_frag_info *frag) +{ +#define CAN_RELEASE_MASK \ + (BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE) | BIT(MLX5E_WQE_FRAG_SKIP_RELEASE)) + +#define CAN_RELEASE_VALUE BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE) + + return (frag->flags & CAN_RELEASE_MASK) == CAN_RELEASE_VALUE; +} + +static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq, + struct mlx5e_wqe_frag_info *frag) +{ + if (mlx5e_frag_can_release(frag)) + mlx5e_page_release_fragmented(rq, frag->frag_page); +} + +static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix) +{ + return &rq->wqe.frags[ix << rq->wqe.info.log_num_frags]; +} + +static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe, + u16 ix) +{ + struct mlx5e_wqe_frag_info *frag = get_frag(rq, ix); + int err; + int i; + + for (i = 0; i < rq->wqe.info.num_frags; i++, frag++) { + dma_addr_t addr; + u16 headroom; + + err = mlx5e_get_rx_frag(rq, frag); + if (unlikely(err)) + goto free_frags; + + frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); + + headroom = i == 0 ? rq->buff.headroom : 0; + addr = page_pool_get_dma_addr(frag->frag_page->page); + wqe->data[i].addr = cpu_to_be64(addr + frag->offset + headroom); + } + + return 0; + +free_frags: + while (--i >= 0) + mlx5e_put_rx_frag(rq, --frag); + + return err; +} + +static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq, + struct mlx5e_wqe_frag_info *wi) +{ + int i; + + for (i = 0; i < rq->wqe.info.num_frags; i++, wi++) + mlx5e_put_rx_frag(rq, wi); +} + +static void mlx5e_xsk_free_rx_wqe(struct mlx5e_wqe_frag_info *wi) +{ + if (!(wi->flags & BIT(MLX5E_WQE_FRAG_SKIP_RELEASE))) + xsk_buff_free(*wi->xskp); +} + +static void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) +{ + struct mlx5e_wqe_frag_info *wi = get_frag(rq, ix); + + if (rq->xsk_pool) { + mlx5e_xsk_free_rx_wqe(wi); + } else { + mlx5e_free_rx_wqe(rq, wi); + + /* Avoid a second release of the wqe pages: dealloc is called + * for the same missing wqes on regular RQ flush and on regular + * RQ close. This happens when XSK RQs come into play. + */ + for (int i = 0; i < rq->wqe.info.num_frags; i++, wi++) + wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); + } +} + +static void mlx5e_xsk_free_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + int i; + + for (i = 0; i < wqe_bulk; i++) { + int j = mlx5_wq_cyc_ctr2ix(wq, ix + i); + struct mlx5e_wqe_frag_info *wi; + + wi = get_frag(rq, j); + /* The page is always put into the Reuse Ring, because there + * is no way to return the page to the userspace when the + * interface goes down. + */ + mlx5e_xsk_free_rx_wqe(wi); + } +} + +static void mlx5e_free_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + int i; + + for (i = 0; i < wqe_bulk; i++) { + int j = mlx5_wq_cyc_ctr2ix(wq, ix + i); + struct mlx5e_wqe_frag_info *wi; + + wi = get_frag(rq, j); + mlx5e_free_rx_wqe(rq, wi); + } +} + +static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + int i; + + for (i = 0; i < wqe_bulk; i++) { + int j = mlx5_wq_cyc_ctr2ix(wq, ix + i); + struct mlx5e_rx_wqe_cyc *wqe; + + wqe = mlx5_wq_cyc_get_wqe(wq, j); + + if (unlikely(mlx5e_alloc_rx_wqe(rq, wqe, j))) + break; + } + + return i; +} + +static int mlx5e_refill_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) +{ + int remaining = wqe_bulk; + int total_alloc = 0; + int refill_alloc; + int refill; + + /* The WQE bulk is split into smaller bulks that are sized + * according to the page pool cache refill size to avoid overflowing + * the page pool cache due to too many page releases at once. + */ + do { + refill = min_t(u16, rq->wqe.info.refill_unit, remaining); + + mlx5e_free_rx_wqes(rq, ix + total_alloc, refill); + refill_alloc = mlx5e_alloc_rx_wqes(rq, ix + total_alloc, refill); + if (unlikely(refill_alloc != refill)) + goto err_free; + + total_alloc += refill_alloc; + remaining -= refill; + } while (remaining); + + return total_alloc; + +err_free: + mlx5e_free_rx_wqes(rq, ix, total_alloc + refill_alloc); + + for (int i = 0; i < total_alloc + refill; i++) { + int j = mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, ix + i); + struct mlx5e_wqe_frag_info *frag; + + frag = get_frag(rq, j); + for (int k = 0; k < rq->wqe.info.num_frags; k++, frag++) + frag->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); + } + + return 0; +} + +static void +mlx5e_add_skb_shared_info_frag(struct mlx5e_rq *rq, struct skb_shared_info *sinfo, + struct xdp_buff *xdp, struct mlx5e_frag_page *frag_page, + u32 frag_offset, u32 len) +{ + skb_frag_t *frag; + + dma_addr_t addr = page_pool_get_dma_addr(frag_page->page); + + dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, rq->buff.map_dir); + if (!xdp_buff_has_frags(xdp)) { + /* Init on the first fragment to avoid cold cache access + * when possible. + */ + sinfo->nr_frags = 0; + sinfo->xdp_frags_size = 0; + xdp_buff_set_frags_flag(xdp); + } + + frag = &sinfo->frags[sinfo->nr_frags++]; + skb_frag_fill_page_desc(frag, frag_page->page, frag_offset, len); + + if (page_is_pfmemalloc(frag_page->page)) + xdp_buff_set_frag_pfmemalloc(xdp); + sinfo->xdp_frags_size += len; +} + +static inline void +mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb, + struct page *page, u32 frag_offset, u32 len, + unsigned int truesize) +{ + dma_addr_t addr = page_pool_get_dma_addr(page); + + dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, + rq->buff.map_dir); + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + page, frag_offset, len, truesize); +} + +static inline void +mlx5e_copy_skb_header(struct mlx5e_rq *rq, struct sk_buff *skb, + struct page *page, dma_addr_t addr, + int offset_from, int dma_offset, u32 headlen) +{ + const void *from = page_address(page) + offset_from; + /* Aligning len to sizeof(long) optimizes memcpy performance */ + unsigned int len = ALIGN(headlen, sizeof(long)); + + dma_sync_single_for_cpu(rq->pdev, addr + dma_offset, len, + rq->buff.map_dir); + skb_copy_to_linear_data(skb, from, len); +} + +static void +mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi) +{ + bool no_xdp_xmit; + int i; + + /* A common case for AF_XDP. */ + if (bitmap_full(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe)) + return; + + no_xdp_xmit = bitmap_empty(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); + + if (rq->xsk_pool) { + struct xdp_buff **xsk_buffs = wi->alloc_units.xsk_buffs; + + /* The page is always put into the Reuse Ring, because there + * is no way to return the page to userspace when the interface + * goes down. + */ + for (i = 0; i < rq->mpwqe.pages_per_wqe; i++) + if (no_xdp_xmit || !test_bit(i, wi->skip_release_bitmap)) + xsk_buff_free(xsk_buffs[i]); + } else { + for (i = 0; i < rq->mpwqe.pages_per_wqe; i++) { + if (no_xdp_xmit || !test_bit(i, wi->skip_release_bitmap)) { + struct mlx5e_frag_page *frag_page; + + frag_page = &wi->alloc_units.frag_pages[i]; + mlx5e_page_release_fragmented(rq, frag_page); + } + } + } +} + +static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq, u8 n) +{ + struct mlx5_wq_ll *wq = &rq->mpwqe.wq; + + do { + u16 next_wqe_index = mlx5_wq_ll_get_wqe_next_ix(wq, wq->head); + + mlx5_wq_ll_push(wq, next_wqe_index); + } while (--n); + + /* ensure wqes are visible to device before updating doorbell record */ + dma_wmb(); + + mlx5_wq_ll_update_db_record(wq); +} + +/* This function returns the size of the continuous free space inside a bitmap + * that starts from first and no longer than len including circular ones. + */ +static int bitmap_find_window(unsigned long *bitmap, int len, + int bitmap_size, int first) +{ + int next_one, count; + + next_one = find_next_bit(bitmap, bitmap_size, first); + if (next_one == bitmap_size) { + if (bitmap_size - first >= len) + return len; + next_one = find_next_bit(bitmap, bitmap_size, 0); + count = next_one + bitmap_size - first; + } else { + count = next_one - first; + } + + return min(len, count); +} + +static void build_klm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe, + __be32 key, u16 offset, u16 klm_len, u16 wqe_bbs) +{ + memset(umr_wqe, 0, offsetof(struct mlx5e_umr_wqe, inline_klms)); + umr_wqe->ctrl.opmod_idx_opcode = + cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | + MLX5_OPCODE_UMR); + umr_wqe->ctrl.umr_mkey = key; + umr_wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) + | MLX5E_KLM_UMR_DS_CNT(klm_len)); + umr_wqe->uctrl.flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE; + umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); + umr_wqe->uctrl.xlt_octowords = cpu_to_be16(klm_len); + umr_wqe->uctrl.mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); +} + +static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, + struct mlx5e_icosq *sq, + u16 klm_entries, u16 index) +{ + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + u16 entries, pi, header_offset, err, wqe_bbs, new_entries; + u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey; + u16 page_index = shampo->curr_page_index; + struct mlx5e_frag_page *frag_page; + u64 addr = shampo->last_addr; + struct mlx5e_dma_info *dma_info; + struct mlx5e_umr_wqe *umr_wqe; + int headroom, i; + + headroom = rq->buff.headroom; + new_entries = klm_entries - (shampo->pi & (MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT - 1)); + entries = ALIGN(klm_entries, MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT); + wqe_bbs = MLX5E_KLM_UMR_WQEBBS(entries); + pi = mlx5e_icosq_get_next_pi(sq, wqe_bbs); + umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); + build_klm_umr(sq, umr_wqe, shampo->key, index, entries, wqe_bbs); + + frag_page = &shampo->pages[page_index]; + + for (i = 0; i < entries; i++, index++) { + dma_info = &shampo->info[index]; + if (i >= klm_entries || (index < shampo->pi && shampo->pi - index < + MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT)) + goto update_klm; + header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) << + MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE; + if (!(header_offset & (PAGE_SIZE - 1))) { + page_index = (page_index + 1) & (shampo->hd_per_wq - 1); + frag_page = &shampo->pages[page_index]; + + err = mlx5e_page_alloc_fragmented(rq, frag_page); + if (unlikely(err)) + goto err_unmap; + + addr = page_pool_get_dma_addr(frag_page->page); + + dma_info->addr = addr; + dma_info->frag_page = frag_page; + } else { + dma_info->addr = addr + header_offset; + dma_info->frag_page = frag_page; + } + +update_klm: + umr_wqe->inline_klms[i].bcount = + cpu_to_be32(MLX5E_RX_MAX_HEAD); + umr_wqe->inline_klms[i].key = cpu_to_be32(lkey); + umr_wqe->inline_klms[i].va = + cpu_to_be64(dma_info->addr + headroom); + } + + sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { + .wqe_type = MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR, + .num_wqebbs = wqe_bbs, + .shampo.len = new_entries, + }; + + shampo->pi = (shampo->pi + new_entries) & (shampo->hd_per_wq - 1); + shampo->curr_page_index = page_index; + shampo->last_addr = addr; + sq->pc += wqe_bbs; + sq->doorbell_cseg = &umr_wqe->ctrl; + + return 0; + +err_unmap: + while (--i >= 0) { + dma_info = &shampo->info[--index]; + if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) { + dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE); + mlx5e_page_release_fragmented(rq, dma_info->frag_page); + } + } + rq->stats->buff_alloc_err++; + return err; +} + +static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) +{ + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + u16 klm_entries, num_wqe, index, entries_before; + struct mlx5e_icosq *sq = rq->icosq; + int i, err, max_klm_entries, len; + + max_klm_entries = MLX5E_MAX_KLM_PER_WQE(rq->mdev); + klm_entries = bitmap_find_window(shampo->bitmap, + shampo->hd_per_wqe, + shampo->hd_per_wq, shampo->pi); + if (!klm_entries) + return 0; + + klm_entries += (shampo->pi & (MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT - 1)); + index = ALIGN_DOWN(shampo->pi, MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT); + entries_before = shampo->hd_per_wq - index; + + if (unlikely(entries_before < klm_entries)) + num_wqe = DIV_ROUND_UP(entries_before, max_klm_entries) + + DIV_ROUND_UP(klm_entries - entries_before, max_klm_entries); + else + num_wqe = DIV_ROUND_UP(klm_entries, max_klm_entries); + + for (i = 0; i < num_wqe; i++) { + len = (klm_entries > max_klm_entries) ? max_klm_entries : + klm_entries; + if (unlikely(index + len > shampo->hd_per_wq)) + len = shampo->hd_per_wq - index; + err = mlx5e_build_shampo_hd_umr(rq, sq, len, index); + if (unlikely(err)) + return err; + index = (index + len) & (rq->mpwqe.shampo->hd_per_wq - 1); + klm_entries -= len; + } + + return 0; +} + +static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) +{ + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); + struct mlx5e_icosq *sq = rq->icosq; + struct mlx5e_frag_page *frag_page; + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5e_umr_wqe *umr_wqe; + u32 offset; /* 17-bit value with MTT. */ + u16 pi; + int err; + int i; + + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) { + err = mlx5e_alloc_rx_hd_mpwqe(rq); + if (unlikely(err)) + goto err; + } + + pi = mlx5e_icosq_get_next_pi(sq, rq->mpwqe.umr_wqebbs); + umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); + memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe)); + + frag_page = &wi->alloc_units.frag_pages[0]; + + for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, frag_page++) { + dma_addr_t addr; + + err = mlx5e_page_alloc_fragmented(rq, frag_page); + if (unlikely(err)) + goto err_unmap; + addr = page_pool_get_dma_addr(frag_page->page); + umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { + .ptag = cpu_to_be64(addr | MLX5_EN_WR), + }; + } + + /* Pad if needed, in case the value set to ucseg->xlt_octowords + * in mlx5e_build_umr_wqe() needed alignment. + */ + if (rq->mpwqe.pages_per_wqe & (MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT - 1)) { + int pad = ALIGN(rq->mpwqe.pages_per_wqe, MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT) - + rq->mpwqe.pages_per_wqe; + + memset(&umr_wqe->inline_mtts[rq->mpwqe.pages_per_wqe], 0, + sizeof(*umr_wqe->inline_mtts) * pad); + } + + bitmap_zero(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); + wi->consumed_strides = 0; + + umr_wqe->ctrl.opmod_idx_opcode = + cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | + MLX5_OPCODE_UMR); + + offset = (ix * rq->mpwqe.mtts_per_wqe) * sizeof(struct mlx5_mtt) / MLX5_OCTWORD; + umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); + + sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { + .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX, + .num_wqebbs = rq->mpwqe.umr_wqebbs, + .umr.rq = rq, + }; + + sq->pc += rq->mpwqe.umr_wqebbs; + + sq->doorbell_cseg = &umr_wqe->ctrl; + + return 0; + +err_unmap: + while (--i >= 0) { + frag_page--; + mlx5e_page_release_fragmented(rq, frag_page); + } + + bitmap_fill(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); + +err: + rq->stats->buff_alloc_err++; + + return err; +} + +/* This function is responsible to dealloc SHAMPO header buffer. + * close == true specifies that we are in the middle of closing RQ operation so + * we go over all the entries and if they are not in use we free them, + * otherwise we only go over a specific range inside the header buffer that are + * not in use. + */ +void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close) +{ + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + struct mlx5e_frag_page *deleted_page = NULL; + int hd_per_wq = shampo->hd_per_wq; + struct mlx5e_dma_info *hd_info; + int i, index = start; + + for (i = 0; i < len; i++, index++) { + if (index == hd_per_wq) + index = 0; + + if (close && !test_bit(index, shampo->bitmap)) + continue; + + hd_info = &shampo->info[index]; + hd_info->addr = ALIGN_DOWN(hd_info->addr, PAGE_SIZE); + if (hd_info->frag_page && hd_info->frag_page != deleted_page) { + deleted_page = hd_info->frag_page; + mlx5e_page_release_fragmented(rq, hd_info->frag_page); + } + + hd_info->frag_page = NULL; + } + + if (start + len > hd_per_wq) { + len -= hd_per_wq - start; + bitmap_clear(shampo->bitmap, start, hd_per_wq - start); + start = 0; + } + + bitmap_clear(shampo->bitmap, start, len); +} + +static void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) +{ + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); + /* This function is called on rq/netdev close. */ + mlx5e_free_rx_mpwqe(rq, wi); + + /* Avoid a second release of the wqe pages: dealloc is called also + * for missing wqes on an already flushed RQ. + */ + bitmap_fill(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); +} + +INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + int wqe_bulk, count; + bool busy = false; + u16 head; + + if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) + return false; + + if (mlx5_wq_cyc_missing(wq) < rq->wqe.info.wqe_bulk) + return false; + + if (rq->page_pool) + page_pool_nid_changed(rq->page_pool, numa_mem_id()); + + wqe_bulk = mlx5_wq_cyc_missing(wq); + head = mlx5_wq_cyc_get_head(wq); + + /* Don't allow any newly allocated WQEs to share the same page with old + * WQEs that aren't completed yet. Stop earlier. + */ + wqe_bulk -= (head + wqe_bulk) & rq->wqe.info.wqe_index_mask; + + if (!rq->xsk_pool) { + count = mlx5e_refill_rx_wqes(rq, head, wqe_bulk); + } else if (likely(!rq->xsk_pool->dma_need_sync)) { + mlx5e_xsk_free_rx_wqes(rq, head, wqe_bulk); + count = mlx5e_xsk_alloc_rx_wqes_batched(rq, head, wqe_bulk); + } else { + mlx5e_xsk_free_rx_wqes(rq, head, wqe_bulk); + /* If dma_need_sync is true, it's more efficient to call + * xsk_buff_alloc in a loop, rather than xsk_buff_alloc_batch, + * because the latter does the same check and returns only one + * frame. + */ + count = mlx5e_xsk_alloc_rx_wqes(rq, head, wqe_bulk); + } + + mlx5_wq_cyc_push_n(wq, count); + if (unlikely(count != wqe_bulk)) { + rq->stats->buff_alloc_err++; + busy = true; + } + + /* ensure wqes are visible to device before updating doorbell record */ + dma_wmb(); + + mlx5_wq_cyc_update_db_record(wq); + + return busy; +} + +void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq) +{ + u16 sqcc; + + sqcc = sq->cc; + + while (sqcc != sq->pc) { + struct mlx5e_icosq_wqe_info *wi; + u16 ci; + + ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); + wi = &sq->db.wqe_info[ci]; + sqcc += wi->num_wqebbs; +#ifdef CONFIG_MLX5_EN_TLS + switch (wi->wqe_type) { + case MLX5E_ICOSQ_WQE_SET_PSV_TLS: + mlx5e_ktls_handle_ctx_completion(wi); + break; + case MLX5E_ICOSQ_WQE_GET_PSV_TLS: + mlx5e_ktls_handle_get_psv_completion(wi, sq); + break; + } +#endif + } + sq->cc = sqcc; +} + +static void mlx5e_handle_shampo_hd_umr(struct mlx5e_shampo_umr umr, + struct mlx5e_icosq *sq) +{ + struct mlx5e_channel *c = container_of(sq, struct mlx5e_channel, icosq); + struct mlx5e_shampo_hd *shampo; + /* assume 1:1 relationship between RQ and icosq */ + struct mlx5e_rq *rq = &c->rq; + int end, from, len = umr.len; + + shampo = rq->mpwqe.shampo; + end = shampo->hd_per_wq; + from = shampo->ci; + if (from + len > shampo->hd_per_wq) { + len -= end - from; + bitmap_set(shampo->bitmap, from, end - from); + from = 0; + } + + bitmap_set(shampo->bitmap, from, len); + shampo->ci = (shampo->ci + umr.len) & (shampo->hd_per_wq - 1); +} + +int mlx5e_poll_ico_cq(struct mlx5e_cq *cq) +{ + struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq); + struct mlx5_cqe64 *cqe; + u16 sqcc; + int i; + + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + return 0; + + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (likely(!cqe)) + return 0; + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + sqcc = sq->cc; + + i = 0; + do { + u16 wqe_counter; + bool last_wqe; + + mlx5_cqwq_pop(&cq->wq); + + wqe_counter = be16_to_cpu(cqe->wqe_counter); + + do { + struct mlx5e_icosq_wqe_info *wi; + u16 ci; + + last_wqe = (sqcc == wqe_counter); + + ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); + wi = &sq->db.wqe_info[ci]; + sqcc += wi->num_wqebbs; + + if (last_wqe && unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { + netdev_WARN_ONCE(cq->netdev, + "Bad OP in ICOSQ CQE: 0x%x\n", + get_cqe_opcode(cqe)); + mlx5e_dump_error_cqe(&sq->cq, sq->sqn, + (struct mlx5_err_cqe *)cqe); + mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); + if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) + queue_work(cq->priv->wq, &sq->recover_work); + break; + } + + switch (wi->wqe_type) { + case MLX5E_ICOSQ_WQE_UMR_RX: + wi->umr.rq->mpwqe.umr_completed++; + break; + case MLX5E_ICOSQ_WQE_NOP: + break; + case MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR: + mlx5e_handle_shampo_hd_umr(wi->shampo, sq); + break; +#ifdef CONFIG_MLX5_EN_TLS + case MLX5E_ICOSQ_WQE_UMR_TLS: + break; + case MLX5E_ICOSQ_WQE_SET_PSV_TLS: + mlx5e_ktls_handle_ctx_completion(wi); + break; + case MLX5E_ICOSQ_WQE_GET_PSV_TLS: + mlx5e_ktls_handle_get_psv_completion(wi, sq); + break; +#endif + default: + netdev_WARN_ONCE(cq->netdev, + "Bad WQE type in ICOSQ WQE info: 0x%x\n", + wi->wqe_type); + } + } while (!last_wqe); + } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); + + sq->cc = sqcc; + + mlx5_cqwq_update_db_record(&cq->wq); + + return i; +} + +INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) +{ + struct mlx5_wq_ll *wq = &rq->mpwqe.wq; + u8 umr_completed = rq->mpwqe.umr_completed; + struct mlx5e_icosq *sq = rq->icosq; + int alloc_err = 0; + u8 missing, i; + u16 head; + + if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) + return false; + + if (umr_completed) { + mlx5e_post_rx_mpwqe(rq, umr_completed); + rq->mpwqe.umr_in_progress -= umr_completed; + rq->mpwqe.umr_completed = 0; + } + + missing = mlx5_wq_ll_missing(wq) - rq->mpwqe.umr_in_progress; + + if (unlikely(rq->mpwqe.umr_in_progress > rq->mpwqe.umr_last_bulk)) + rq->stats->congst_umr++; + + if (likely(missing < rq->mpwqe.min_wqe_bulk)) + return false; + + if (rq->page_pool) + page_pool_nid_changed(rq->page_pool, numa_mem_id()); + + head = rq->mpwqe.actual_wq_head; + i = missing; + do { + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, head); + + /* Deferred free for better page pool cache usage. */ + mlx5e_free_rx_mpwqe(rq, wi); + + alloc_err = rq->xsk_pool ? mlx5e_xsk_alloc_rx_mpwqe(rq, head) : + mlx5e_alloc_rx_mpwqe(rq, head); + + if (unlikely(alloc_err)) + break; + head = mlx5_wq_ll_get_wqe_next_ix(wq, head); + } while (--i); + + rq->mpwqe.umr_last_bulk = missing - i; + if (sq->doorbell_cseg) { + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, sq->doorbell_cseg); + sq->doorbell_cseg = NULL; + } + + rq->mpwqe.umr_in_progress += rq->mpwqe.umr_last_bulk; + rq->mpwqe.actual_wq_head = head; + + /* If XSK Fill Ring doesn't have enough frames, report the error, so + * that one of the actions can be performed: + * 1. If need_wakeup is used, signal that the application has to kick + * the driver when it refills the Fill Ring. + * 2. Otherwise, busy poll by rescheduling the NAPI poll. + */ + if (unlikely(alloc_err == -ENOMEM && rq->xsk_pool)) + return true; + + return false; +} + +static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp) +{ + u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe); + u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) || + (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA); + + tcp->check = 0; + tcp->psh = get_cqe_lro_tcppsh(cqe); + + if (tcp_ack) { + tcp->ack = 1; + tcp->ack_seq = cqe->lro.ack_seq_num; + tcp->window = cqe->lro.tcp_win; + } +} + +static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, + u32 cqe_bcnt) +{ + struct ethhdr *eth = (struct ethhdr *)(skb->data); + struct tcphdr *tcp; + int network_depth = 0; + __wsum check; + __be16 proto; + u16 tot_len; + void *ip_p; + + proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth); + + tot_len = cqe_bcnt - network_depth; + ip_p = skb->data + network_depth; + + if (proto == htons(ETH_P_IP)) { + struct iphdr *ipv4 = ip_p; + + tcp = ip_p + sizeof(struct iphdr); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + + ipv4->ttl = cqe->lro.min_ttl; + ipv4->tot_len = cpu_to_be16(tot_len); + ipv4->check = 0; + ipv4->check = ip_fast_csum((unsigned char *)ipv4, + ipv4->ihl); + + mlx5e_lro_update_tcp_hdr(cqe, tcp); + check = csum_partial(tcp, tcp->doff * 4, + csum_unfold((__force __sum16)cqe->check_sum)); + /* Almost done, don't forget the pseudo header */ + tcp->check = csum_tcpudp_magic(ipv4->saddr, ipv4->daddr, + tot_len - sizeof(struct iphdr), + IPPROTO_TCP, check); + } else { + u16 payload_len = tot_len - sizeof(struct ipv6hdr); + struct ipv6hdr *ipv6 = ip_p; + + tcp = ip_p + sizeof(struct ipv6hdr); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + + ipv6->hop_limit = cqe->lro.min_ttl; + ipv6->payload_len = cpu_to_be16(payload_len); + + mlx5e_lro_update_tcp_hdr(cqe, tcp); + check = csum_partial(tcp, tcp->doff * 4, + csum_unfold((__force __sum16)cqe->check_sum)); + /* Almost done, don't forget the pseudo header */ + tcp->check = csum_ipv6_magic(&ipv6->saddr, &ipv6->daddr, payload_len, + IPPROTO_TCP, check); + } +} + +static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index) +{ + struct mlx5e_dma_info *last_head = &rq->mpwqe.shampo->info[header_index]; + u16 head_offset = (last_head->addr & (PAGE_SIZE - 1)) + rq->buff.headroom; + + return page_address(last_head->frag_page->page) + head_offset; +} + +static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4) +{ + int udp_off = rq->hw_gro_data->fk.control.thoff; + struct sk_buff *skb = rq->hw_gro_data->skb; + struct udphdr *uh; + + uh = (struct udphdr *)(skb->data + udp_off); + uh->len = htons(skb->len - udp_off); + + if (uh->check) + uh->check = ~udp_v4_check(skb->len - udp_off, ipv4->saddr, + ipv4->daddr, 0); + + skb->csum_start = (unsigned char *)uh - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4; +} + +static void mlx5e_shampo_update_ipv6_udp_hdr(struct mlx5e_rq *rq, struct ipv6hdr *ipv6) +{ + int udp_off = rq->hw_gro_data->fk.control.thoff; + struct sk_buff *skb = rq->hw_gro_data->skb; + struct udphdr *uh; + + uh = (struct udphdr *)(skb->data + udp_off); + uh->len = htons(skb->len - udp_off); + + if (uh->check) + uh->check = ~udp_v6_check(skb->len - udp_off, &ipv6->saddr, + &ipv6->daddr, 0); + + skb->csum_start = (unsigned char *)uh - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4; +} + +static void mlx5e_shampo_update_fin_psh_flags(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, + struct tcphdr *skb_tcp_hd) +{ + u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe); + struct tcphdr *last_tcp_hd; + void *last_hd_addr; + + last_hd_addr = mlx5e_shampo_get_packet_hd(rq, header_index); + last_tcp_hd = last_hd_addr + ETH_HLEN + rq->hw_gro_data->fk.control.thoff; + tcp_flag_word(skb_tcp_hd) |= tcp_flag_word(last_tcp_hd) & (TCP_FLAG_FIN | TCP_FLAG_PSH); +} + +static void mlx5e_shampo_update_ipv4_tcp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4, + struct mlx5_cqe64 *cqe, bool match) +{ + int tcp_off = rq->hw_gro_data->fk.control.thoff; + struct sk_buff *skb = rq->hw_gro_data->skb; + struct tcphdr *tcp; + + tcp = (struct tcphdr *)(skb->data + tcp_off); + if (match) + mlx5e_shampo_update_fin_psh_flags(rq, cqe, tcp); + + tcp->check = ~tcp_v4_check(skb->len - tcp_off, ipv4->saddr, + ipv4->daddr, 0); + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; + if (ntohs(ipv4->id) == rq->hw_gro_data->second_ip_id) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID; + + skb->csum_start = (unsigned char *)tcp - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); + + if (tcp->cwr) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; +} + +static void mlx5e_shampo_update_ipv6_tcp_hdr(struct mlx5e_rq *rq, struct ipv6hdr *ipv6, + struct mlx5_cqe64 *cqe, bool match) +{ + int tcp_off = rq->hw_gro_data->fk.control.thoff; + struct sk_buff *skb = rq->hw_gro_data->skb; + struct tcphdr *tcp; + + tcp = (struct tcphdr *)(skb->data + tcp_off); + if (match) + mlx5e_shampo_update_fin_psh_flags(rq, cqe, tcp); + + tcp->check = ~tcp_v6_check(skb->len - tcp_off, &ipv6->saddr, + &ipv6->daddr, 0); + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6; + skb->csum_start = (unsigned char *)tcp - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); + + if (tcp->cwr) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; +} + +static void mlx5e_shampo_update_hdr(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, bool match) +{ + bool is_ipv4 = (rq->hw_gro_data->fk.basic.n_proto == htons(ETH_P_IP)); + struct sk_buff *skb = rq->hw_gro_data->skb; + + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + skb->ip_summed = CHECKSUM_PARTIAL; + + if (is_ipv4) { + int nhoff = rq->hw_gro_data->fk.control.thoff - sizeof(struct iphdr); + struct iphdr *ipv4 = (struct iphdr *)(skb->data + nhoff); + __be16 newlen = htons(skb->len - nhoff); + + csum_replace2(&ipv4->check, ipv4->tot_len, newlen); + ipv4->tot_len = newlen; + + if (ipv4->protocol == IPPROTO_TCP) + mlx5e_shampo_update_ipv4_tcp_hdr(rq, ipv4, cqe, match); + else + mlx5e_shampo_update_ipv4_udp_hdr(rq, ipv4); + } else { + int nhoff = rq->hw_gro_data->fk.control.thoff - sizeof(struct ipv6hdr); + struct ipv6hdr *ipv6 = (struct ipv6hdr *)(skb->data + nhoff); + + ipv6->payload_len = htons(skb->len - nhoff - sizeof(*ipv6)); + + if (ipv6->nexthdr == IPPROTO_TCP) + mlx5e_shampo_update_ipv6_tcp_hdr(rq, ipv6, cqe, match); + else + mlx5e_shampo_update_ipv6_udp_hdr(rq, ipv6); + } +} + +static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe, + struct sk_buff *skb) +{ + u8 cht = cqe->rss_hash_type; + int ht = (cht & CQE_RSS_HTYPE_L4) ? PKT_HASH_TYPE_L4 : + (cht & CQE_RSS_HTYPE_IP) ? PKT_HASH_TYPE_L3 : + PKT_HASH_TYPE_NONE; + skb_set_hash(skb, be32_to_cpu(cqe->rss_hash_result), ht); +} + +static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth, + __be16 *proto) +{ + *proto = ((struct ethhdr *)skb->data)->h_proto; + *proto = __vlan_get_protocol(skb, *proto, network_depth); + + if (*proto == htons(ETH_P_IP)) + return pskb_may_pull(skb, *network_depth + sizeof(struct iphdr)); + + if (*proto == htons(ETH_P_IPV6)) + return pskb_may_pull(skb, *network_depth + sizeof(struct ipv6hdr)); + + return false; +} + +static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb) +{ + int network_depth = 0; + __be16 proto; + void *ip; + int rc; + + if (unlikely(!is_last_ethertype_ip(skb, &network_depth, &proto))) + return; + + ip = skb->data + network_depth; + rc = ((proto == htons(ETH_P_IP)) ? IP_ECN_set_ce((struct iphdr *)ip) : + IP6_ECN_set_ce(skb, (struct ipv6hdr *)ip)); + + rq->stats->ecn_mark += !!rc; +} + +static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto) +{ + void *ip_p = skb->data + network_depth; + + return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol : + ((struct ipv6hdr *)ip_p)->nexthdr; +} + +#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN) + +#define MAX_PADDING 8 + +static void +tail_padding_csum_slow(struct sk_buff *skb, int offset, int len, + struct mlx5e_rq_stats *stats) +{ + stats->csum_complete_tail_slow++; + skb->csum = csum_block_add(skb->csum, + skb_checksum(skb, offset, len, 0), + offset); +} + +static void +tail_padding_csum(struct sk_buff *skb, int offset, + struct mlx5e_rq_stats *stats) +{ + u8 tail_padding[MAX_PADDING]; + int len = skb->len - offset; + void *tail; + + if (unlikely(len > MAX_PADDING)) { + tail_padding_csum_slow(skb, offset, len, stats); + return; + } + + tail = skb_header_pointer(skb, offset, len, tail_padding); + if (unlikely(!tail)) { + tail_padding_csum_slow(skb, offset, len, stats); + return; + } + + stats->csum_complete_tail++; + skb->csum = csum_block_add(skb->csum, csum_partial(tail, len, 0), offset); +} + +static void +mlx5e_skb_csum_fixup(struct sk_buff *skb, int network_depth, __be16 proto, + struct mlx5e_rq_stats *stats) +{ + struct ipv6hdr *ip6; + struct iphdr *ip4; + int pkt_len; + + /* Fixup vlan headers, if any */ + if (network_depth > ETH_HLEN) + /* CQE csum is calculated from the IP header and does + * not cover VLAN headers (if present). This will add + * the checksum manually. + */ + skb->csum = csum_partial(skb->data + ETH_HLEN, + network_depth - ETH_HLEN, + skb->csum); + + /* Fixup tail padding, if any */ + switch (proto) { + case htons(ETH_P_IP): + ip4 = (struct iphdr *)(skb->data + network_depth); + pkt_len = network_depth + ntohs(ip4->tot_len); + break; + case htons(ETH_P_IPV6): + ip6 = (struct ipv6hdr *)(skb->data + network_depth); + pkt_len = network_depth + sizeof(*ip6) + ntohs(ip6->payload_len); + break; + default: + return; + } + + if (likely(pkt_len >= skb->len)) + return; + + tail_padding_csum(skb, pkt_len, stats); +} + +static inline void mlx5e_handle_csum(struct net_device *netdev, + struct mlx5_cqe64 *cqe, + struct mlx5e_rq *rq, + struct sk_buff *skb, + bool lro) +{ + struct mlx5e_rq_stats *stats = rq->stats; + int network_depth = 0; + __be16 proto; + + if (unlikely(!(netdev->features & NETIF_F_RXCSUM))) + goto csum_none; + + if (lro) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + stats->csum_unnecessary++; + return; + } + + /* True when explicitly set via priv flag, or XDP prog is loaded */ + if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state) || + get_cqe_tls_offload(cqe)) + goto csum_unnecessary; + + /* CQE csum doesn't cover padding octets in short ethernet + * frames. And the pad field is appended prior to calculating + * and appending the FCS field. + * + * Detecting these padded frames requires to verify and parse + * IP headers, so we simply force all those small frames to be + * CHECKSUM_UNNECESSARY even if they are not padded. + */ + if (short_frame(skb->len)) + goto csum_unnecessary; + + if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) { + if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP)) + goto csum_unnecessary; + + stats->csum_complete++; + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum = csum_unfold((__force __sum16)cqe->check_sum); + + if (test_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state)) + return; /* CQE csum covers all received bytes */ + + /* csum might need some fixups ...*/ + mlx5e_skb_csum_fixup(skb, network_depth, proto, stats); + return; + } + +csum_unnecessary: + if (likely((cqe->hds_ip_ext & CQE_L3_OK) && + (cqe->hds_ip_ext & CQE_L4_OK))) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + if (cqe_is_tunneled(cqe)) { + skb->csum_level = 1; + skb->encapsulation = 1; + stats->csum_unnecessary_inner++; + return; + } + stats->csum_unnecessary++; + return; + } +csum_none: + skb->ip_summed = CHECKSUM_NONE; + stats->csum_none++; +} + +#define MLX5E_CE_BIT_MASK 0x80 + +static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, + u32 cqe_bcnt, + struct mlx5e_rq *rq, + struct sk_buff *skb) +{ + u8 lro_num_seg = be32_to_cpu(cqe->srqn) >> 24; + struct mlx5e_rq_stats *stats = rq->stats; + struct net_device *netdev = rq->netdev; + + skb->mac_len = ETH_HLEN; + + if (unlikely(get_cqe_tls_offload(cqe))) + mlx5e_ktls_handle_rx_skb(rq, skb, cqe, &cqe_bcnt); + + if (unlikely(mlx5_ipsec_is_rx_flow(cqe))) + mlx5e_ipsec_offload_handle_rx_skb(netdev, skb, + be32_to_cpu(cqe->ft_metadata)); + + if (unlikely(mlx5e_macsec_is_rx_flow(cqe))) + mlx5e_macsec_offload_handle_rx_skb(netdev, skb, cqe); + + if (lro_num_seg > 1) { + mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt); + skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg); + /* Subtract one since we already counted this as one + * "regular" packet in mlx5e_complete_rx_cqe() + */ + stats->packets += lro_num_seg - 1; + stats->lro_packets++; + stats->lro_bytes += cqe_bcnt; + } + + if (unlikely(mlx5e_rx_hw_stamp(rq->tstamp))) + skb_hwtstamps(skb)->hwtstamp = mlx5e_cqe_ts_to_ns(rq->ptp_cyc2time, + rq->clock, get_cqe_ts(cqe)); + skb_record_rx_queue(skb, rq->ix); + + if (likely(netdev->features & NETIF_F_RXHASH)) + mlx5e_skb_set_hash(cqe, skb); + + if (cqe_has_vlan(cqe)) { + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + be16_to_cpu(cqe->vlan_info)); + stats->removed_vlan_packets++; + } + + skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK; + + mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg); + /* checking CE bit in cqe - MSB in ml_path field */ + if (unlikely(cqe->ml_path & MLX5E_CE_BIT_MASK)) + mlx5e_enable_ecn(rq, skb); + + skb->protocol = eth_type_trans(skb, netdev); + + if (unlikely(mlx5e_skb_is_multicast(skb))) + stats->mcast_packets++; +} + +static void mlx5e_shampo_complete_rx_cqe(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u32 cqe_bcnt, + struct sk_buff *skb) +{ + struct mlx5e_rq_stats *stats = rq->stats; + + stats->packets++; + stats->gro_packets++; + stats->bytes += cqe_bcnt; + stats->gro_bytes += cqe_bcnt; + if (NAPI_GRO_CB(skb)->count != 1) + return; + mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb); + skb_reset_network_header(skb); + if (!skb_flow_dissect_flow_keys(skb, &rq->hw_gro_data->fk, 0)) { + napi_gro_receive(rq->cq.napi, skb); + rq->hw_gro_data->skb = NULL; + } +} + +static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u32 cqe_bcnt, + struct sk_buff *skb) +{ + struct mlx5e_rq_stats *stats = rq->stats; + + stats->packets++; + stats->bytes += cqe_bcnt; + mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb); +} + +static inline +struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va, + u32 frag_size, u16 headroom, + u32 cqe_bcnt, u32 metasize) +{ + struct sk_buff *skb = napi_build_skb(va, frag_size); + + if (unlikely(!skb)) { + rq->stats->buff_alloc_err++; + return NULL; + } + + skb_reserve(skb, headroom); + skb_put(skb, cqe_bcnt); + + if (metasize) + skb_metadata_set(skb, metasize); + + return skb; +} + +static void mlx5e_fill_mxbuf(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, + void *va, u16 headroom, u32 frame_sz, u32 len, + struct mlx5e_xdp_buff *mxbuf) +{ + xdp_init_buff(&mxbuf->xdp, frame_sz, &rq->xdp_rxq); + xdp_prepare_buff(&mxbuf->xdp, va, headroom, len, true); + mxbuf->cqe = cqe; + mxbuf->rq = rq; +} + +static struct sk_buff * +mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, + struct mlx5_cqe64 *cqe, u32 cqe_bcnt) +{ + struct mlx5e_frag_page *frag_page = wi->frag_page; + u16 rx_headroom = rq->buff.headroom; + struct bpf_prog *prog; + struct sk_buff *skb; + u32 metasize = 0; + void *va, *data; + dma_addr_t addr; + u32 frag_size; + + va = page_address(frag_page->page) + wi->offset; + data = va + rx_headroom; + frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); + + addr = page_pool_get_dma_addr(frag_page->page); + dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, + frag_size, rq->buff.map_dir); + net_prefetch(data); + + prog = rcu_dereference(rq->xdp_prog); + if (prog) { + struct mlx5e_xdp_buff mxbuf; + + net_prefetchw(va); /* xdp_frame data area */ + mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz, + cqe_bcnt, &mxbuf); + if (mlx5e_xdp_handle(rq, prog, &mxbuf)) + return NULL; /* page/packet was consumed by XDP */ + + rx_headroom = mxbuf.xdp.data - mxbuf.xdp.data_hard_start; + metasize = mxbuf.xdp.data - mxbuf.xdp.data_meta; + cqe_bcnt = mxbuf.xdp.data_end - mxbuf.xdp.data; + } + frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); + skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize); + if (unlikely(!skb)) + return NULL; + + /* queue up for recycling/reuse */ + skb_mark_for_recycle(skb); + frag_page->frags++; + + return skb; +} + +static struct sk_buff * +mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, + struct mlx5_cqe64 *cqe, u32 cqe_bcnt) +{ + struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; + struct mlx5e_wqe_frag_info *head_wi = wi; + u16 rx_headroom = rq->buff.headroom; + struct mlx5e_frag_page *frag_page; + struct skb_shared_info *sinfo; + struct mlx5e_xdp_buff mxbuf; + u32 frag_consumed_bytes; + struct bpf_prog *prog; + struct sk_buff *skb; + dma_addr_t addr; + u32 truesize; + void *va; + + frag_page = wi->frag_page; + + va = page_address(frag_page->page) + wi->offset; + frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); + + addr = page_pool_get_dma_addr(frag_page->page); + dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, + rq->buff.frame0_sz, rq->buff.map_dir); + net_prefetchw(va); /* xdp_frame data area */ + net_prefetch(va + rx_headroom); + + mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz, + frag_consumed_bytes, &mxbuf); + sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp); + truesize = 0; + + cqe_bcnt -= frag_consumed_bytes; + frag_info++; + wi++; + + while (cqe_bcnt) { + frag_page = wi->frag_page; + + frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); + + mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf.xdp, frag_page, + wi->offset, frag_consumed_bytes); + truesize += frag_info->frag_stride; + + cqe_bcnt -= frag_consumed_bytes; + frag_info++; + wi++; + } + + prog = rcu_dereference(rq->xdp_prog); + if (prog && mlx5e_xdp_handle(rq, prog, &mxbuf)) { + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { + struct mlx5e_wqe_frag_info *pwi; + + for (pwi = head_wi; pwi < wi; pwi++) + pwi->frag_page->frags++; + } + return NULL; /* page/packet was consumed by XDP */ + } + + skb = mlx5e_build_linear_skb(rq, mxbuf.xdp.data_hard_start, rq->buff.frame0_sz, + mxbuf.xdp.data - mxbuf.xdp.data_hard_start, + mxbuf.xdp.data_end - mxbuf.xdp.data, + mxbuf.xdp.data - mxbuf.xdp.data_meta); + if (unlikely(!skb)) + return NULL; + + skb_mark_for_recycle(skb); + head_wi->frag_page->frags++; + + if (xdp_buff_has_frags(&mxbuf.xdp)) { + /* sinfo->nr_frags is reset by build_skb, calculate again. */ + xdp_update_skb_shared_info(skb, wi - head_wi - 1, + sinfo->xdp_frags_size, truesize, + xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp)); + + for (struct mlx5e_wqe_frag_info *pwi = head_wi + 1; pwi < wi; pwi++) + pwi->frag_page->frags++; + } + + return skb; +} + +static void trigger_report(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe; + struct mlx5e_priv *priv = rq->priv; + + if (cqe_syndrome_needs_recover(err_cqe->syndrome) && + !test_and_set_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state)) { + mlx5e_dump_error_cqe(&rq->cq, rq->rqn, err_cqe); + queue_work(priv->wq, &rq->recover_work); + } +} + +static void mlx5e_handle_rx_err_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + trigger_report(rq, cqe); + rq->stats->wqe_err++; +} + +static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + struct mlx5e_wqe_frag_info *wi; + struct sk_buff *skb; + u32 cqe_bcnt; + u16 ci; + + ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); + wi = get_frag(rq, ci); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + mlx5e_handle_rx_err_cqe(rq, cqe); + goto wq_cyc_pop; + } + + skb = INDIRECT_CALL_3(rq->wqe.skb_from_cqe, + mlx5e_skb_from_cqe_linear, + mlx5e_skb_from_cqe_nonlinear, + mlx5e_xsk_skb_from_cqe_linear, + rq, wi, cqe, cqe_bcnt); + if (!skb) { + /* probably for XDP */ + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) + wi->frag_page->frags++; + goto wq_cyc_pop; + } + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + + if (mlx5e_cqe_regb_chain(cqe)) + if (!mlx5e_tc_update_skb_nic(cqe, skb)) { + dev_kfree_skb_any(skb); + goto wq_cyc_pop; + } + + napi_gro_receive(rq->cq.napi, skb); + +wq_cyc_pop: + mlx5_wq_cyc_pop(wq); +} + +#ifdef CONFIG_MLX5_ESWITCH +static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct net_device *netdev = rq->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + struct mlx5e_wqe_frag_info *wi; + struct sk_buff *skb; + u32 cqe_bcnt; + u16 ci; + + ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); + wi = get_frag(rq, ci); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + mlx5e_handle_rx_err_cqe(rq, cqe); + goto wq_cyc_pop; + } + + skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, + mlx5e_skb_from_cqe_linear, + mlx5e_skb_from_cqe_nonlinear, + rq, wi, cqe, cqe_bcnt); + if (!skb) { + /* probably for XDP */ + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) + wi->frag_page->frags++; + goto wq_cyc_pop; + } + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + + if (rep->vlan && skb_vlan_tag_present(skb)) + skb_vlan_pop(skb); + + mlx5e_rep_tc_receive(cqe, rq, skb); + +wq_cyc_pop: + mlx5_wq_cyc_pop(wq); +} + +static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); + u16 wqe_id = be16_to_cpu(cqe->wqe_id); + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, wqe_id); + u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); + u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; + u32 head_offset = wqe_offset & ((1 << rq->mpwqe.page_shift) - 1); + u32 page_idx = wqe_offset >> rq->mpwqe.page_shift; + struct mlx5e_rx_wqe_ll *wqe; + struct mlx5_wq_ll *wq; + struct sk_buff *skb; + u16 cqe_bcnt; + + wi->consumed_strides += cstrides; + + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + mlx5e_handle_rx_err_cqe(rq, cqe); + goto mpwrq_cqe_out; + } + + if (unlikely(mpwrq_is_filler_cqe(cqe))) { + struct mlx5e_rq_stats *stats = rq->stats; + + stats->mpwqe_filler_cqes++; + stats->mpwqe_filler_strides += cstrides; + goto mpwrq_cqe_out; + } + + cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); + + skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq, + mlx5e_skb_from_cqe_mpwrq_linear, + mlx5e_skb_from_cqe_mpwrq_nonlinear, + rq, wi, cqe, cqe_bcnt, head_offset, page_idx); + if (!skb) + goto mpwrq_cqe_out; + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + + mlx5e_rep_tc_receive(cqe, rq, skb); + +mpwrq_cqe_out: + if (likely(wi->consumed_strides < rq->mpwqe.num_strides)) + return; + + wq = &rq->mpwqe.wq; + wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); + mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); +} + +const struct mlx5e_rx_handlers mlx5e_rx_handlers_rep = { + .handle_rx_cqe = mlx5e_handle_rx_cqe_rep, + .handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq_rep, +}; +#endif + +static void +mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq, + struct mlx5e_frag_page *frag_page, + u32 data_bcnt, u32 data_offset) +{ + net_prefetchw(skb->data); + + while (data_bcnt) { + /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */ + u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - data_offset, data_bcnt); + unsigned int truesize; + + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) + truesize = pg_consumed_bytes; + else + truesize = ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz)); + + frag_page->frags++; + mlx5e_add_skb_frag(rq, skb, frag_page->page, data_offset, + pg_consumed_bytes, truesize); + + data_bcnt -= pg_consumed_bytes; + data_offset = 0; + frag_page++; + } +} + +static struct sk_buff * +mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset, + u32 page_idx) +{ + struct mlx5e_frag_page *frag_page = &wi->alloc_units.frag_pages[page_idx]; + u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt); + struct mlx5e_frag_page *head_page = frag_page; + u32 frag_offset = head_offset; + u32 byte_cnt = cqe_bcnt; + struct skb_shared_info *sinfo; + struct mlx5e_xdp_buff mxbuf; + unsigned int truesize = 0; + struct bpf_prog *prog; + struct sk_buff *skb; + u32 linear_frame_sz; + u16 linear_data_len; + u16 linear_hr; + void *va; + + prog = rcu_dereference(rq->xdp_prog); + + if (prog) { + /* area for bpf_xdp_[store|load]_bytes */ + net_prefetchw(page_address(frag_page->page) + frag_offset); + if (unlikely(mlx5e_page_alloc_fragmented(rq, &wi->linear_page))) { + rq->stats->buff_alloc_err++; + return NULL; + } + va = page_address(wi->linear_page.page); + net_prefetchw(va); /* xdp_frame data area */ + linear_hr = XDP_PACKET_HEADROOM; + linear_data_len = 0; + linear_frame_sz = MLX5_SKB_FRAG_SZ(linear_hr + MLX5E_RX_MAX_HEAD); + } else { + skb = napi_alloc_skb(rq->cq.napi, + ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long))); + if (unlikely(!skb)) { + rq->stats->buff_alloc_err++; + return NULL; + } + skb_mark_for_recycle(skb); + va = skb->head; + net_prefetchw(va); /* xdp_frame data area */ + net_prefetchw(skb->data); + + frag_offset += headlen; + byte_cnt -= headlen; + linear_hr = skb_headroom(skb); + linear_data_len = headlen; + linear_frame_sz = MLX5_SKB_FRAG_SZ(skb_end_offset(skb)); + if (unlikely(frag_offset >= PAGE_SIZE)) { + frag_page++; + frag_offset -= PAGE_SIZE; + } + } + + mlx5e_fill_mxbuf(rq, cqe, va, linear_hr, linear_frame_sz, linear_data_len, &mxbuf); + + sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp); + + while (byte_cnt) { + /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */ + u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - frag_offset, byte_cnt); + + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) + truesize += pg_consumed_bytes; + else + truesize += ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz)); + + mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf.xdp, frag_page, frag_offset, + pg_consumed_bytes); + byte_cnt -= pg_consumed_bytes; + frag_offset = 0; + frag_page++; + } + + if (prog) { + if (mlx5e_xdp_handle(rq, prog, &mxbuf)) { + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { + struct mlx5e_frag_page *pfp; + + for (pfp = head_page; pfp < frag_page; pfp++) + pfp->frags++; + + wi->linear_page.frags++; + } + mlx5e_page_release_fragmented(rq, &wi->linear_page); + return NULL; /* page/packet was consumed by XDP */ + } + + skb = mlx5e_build_linear_skb(rq, mxbuf.xdp.data_hard_start, + linear_frame_sz, + mxbuf.xdp.data - mxbuf.xdp.data_hard_start, 0, + mxbuf.xdp.data - mxbuf.xdp.data_meta); + if (unlikely(!skb)) { + mlx5e_page_release_fragmented(rq, &wi->linear_page); + return NULL; + } + + skb_mark_for_recycle(skb); + wi->linear_page.frags++; + mlx5e_page_release_fragmented(rq, &wi->linear_page); + + if (xdp_buff_has_frags(&mxbuf.xdp)) { + struct mlx5e_frag_page *pagep; + + /* sinfo->nr_frags is reset by build_skb, calculate again. */ + xdp_update_skb_shared_info(skb, frag_page - head_page, + sinfo->xdp_frags_size, truesize, + xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp)); + + pagep = head_page; + do + pagep->frags++; + while (++pagep < frag_page); + } + __pskb_pull_tail(skb, headlen); + } else { + dma_addr_t addr; + + if (xdp_buff_has_frags(&mxbuf.xdp)) { + struct mlx5e_frag_page *pagep; + + xdp_update_skb_shared_info(skb, sinfo->nr_frags, + sinfo->xdp_frags_size, truesize, + xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp)); + + pagep = frag_page - sinfo->nr_frags; + do + pagep->frags++; + while (++pagep < frag_page); + } + /* copy header */ + addr = page_pool_get_dma_addr(head_page->page); + mlx5e_copy_skb_header(rq, skb, head_page->page, addr, + head_offset, head_offset, headlen); + /* skb linear part was allocated with headlen and aligned to long */ + skb->tail += headlen; + skb->len += headlen; + } + + return skb; +} + +static struct sk_buff * +mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset, + u32 page_idx) +{ + struct mlx5e_frag_page *frag_page = &wi->alloc_units.frag_pages[page_idx]; + u16 rx_headroom = rq->buff.headroom; + struct bpf_prog *prog; + struct sk_buff *skb; + u32 metasize = 0; + void *va, *data; + dma_addr_t addr; + u32 frag_size; + + /* Check packet size. Note LRO doesn't use linear SKB */ + if (unlikely(cqe_bcnt > rq->hw_mtu)) { + rq->stats->oversize_pkts_sw_drop++; + return NULL; + } + + va = page_address(frag_page->page) + head_offset; + data = va + rx_headroom; + frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); + + addr = page_pool_get_dma_addr(frag_page->page); + dma_sync_single_range_for_cpu(rq->pdev, addr, head_offset, + frag_size, rq->buff.map_dir); + net_prefetch(data); + + prog = rcu_dereference(rq->xdp_prog); + if (prog) { + struct mlx5e_xdp_buff mxbuf; + + net_prefetchw(va); /* xdp_frame data area */ + mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz, + cqe_bcnt, &mxbuf); + if (mlx5e_xdp_handle(rq, prog, &mxbuf)) { + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) + frag_page->frags++; + return NULL; /* page/packet was consumed by XDP */ + } + + rx_headroom = mxbuf.xdp.data - mxbuf.xdp.data_hard_start; + metasize = mxbuf.xdp.data - mxbuf.xdp.data_meta; + cqe_bcnt = mxbuf.xdp.data_end - mxbuf.xdp.data; + } + frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); + skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize); + if (unlikely(!skb)) + return NULL; + + /* queue up for recycling/reuse */ + skb_mark_for_recycle(skb); + frag_page->frags++; + + return skb; +} + +static struct sk_buff * +mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + struct mlx5_cqe64 *cqe, u16 header_index) +{ + struct mlx5e_dma_info *head = &rq->mpwqe.shampo->info[header_index]; + u16 head_offset = head->addr & (PAGE_SIZE - 1); + u16 head_size = cqe->shampo.header_size; + u16 rx_headroom = rq->buff.headroom; + struct sk_buff *skb = NULL; + void *hdr, *data; + u32 frag_size; + + hdr = page_address(head->frag_page->page) + head_offset; + data = hdr + rx_headroom; + frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + head_size); + + if (likely(frag_size <= BIT(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE))) { + /* build SKB around header */ + dma_sync_single_range_for_cpu(rq->pdev, head->addr, 0, frag_size, rq->buff.map_dir); + prefetchw(hdr); + prefetch(data); + skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size, 0); + + if (unlikely(!skb)) + return NULL; + + head->frag_page->frags++; + } else { + /* allocate SKB and copy header for large header */ + rq->stats->gro_large_hds++; + skb = napi_alloc_skb(rq->cq.napi, + ALIGN(head_size, sizeof(long))); + if (unlikely(!skb)) { + rq->stats->buff_alloc_err++; + return NULL; + } + + prefetchw(skb->data); + mlx5e_copy_skb_header(rq, skb, head->frag_page->page, head->addr, + head_offset + rx_headroom, + rx_headroom, head_size); + /* skb linear part was allocated with headlen and aligned to long */ + skb->tail += head_size; + skb->len += head_size; + } + + /* queue up for recycling/reuse */ + skb_mark_for_recycle(skb); + + return skb; +} + +static void +mlx5e_shampo_align_fragment(struct sk_buff *skb, u8 log_stride_sz) +{ + skb_frag_t *last_frag = &skb_shinfo(skb)->frags[skb_shinfo(skb)->nr_frags - 1]; + unsigned int frag_size = skb_frag_size(last_frag); + unsigned int frag_truesize; + + frag_truesize = ALIGN(frag_size, BIT(log_stride_sz)); + skb->truesize += frag_truesize - frag_size; +} + +static void +mlx5e_shampo_flush_skb(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, bool match) +{ + struct sk_buff *skb = rq->hw_gro_data->skb; + struct mlx5e_rq_stats *stats = rq->stats; + + stats->gro_skbs++; + if (likely(skb_shinfo(skb)->nr_frags)) + mlx5e_shampo_align_fragment(skb, rq->mpwqe.log_stride_sz); + if (NAPI_GRO_CB(skb)->count > 1) + mlx5e_shampo_update_hdr(rq, cqe, match); + napi_gro_receive(rq->cq.napi, skb); + rq->hw_gro_data->skb = NULL; +} + +static bool +mlx5e_hw_gro_skb_has_enough_space(struct sk_buff *skb, u16 data_bcnt) +{ + int nr_frags = skb_shinfo(skb)->nr_frags; + + return PAGE_SIZE * nr_frags + data_bcnt <= GRO_LEGACY_MAX_SIZE; +} + +static void +mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index) +{ + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + u64 addr = shampo->info[header_index].addr; + + if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) { + struct mlx5e_dma_info *dma_info = &shampo->info[header_index]; + + dma_info->addr = ALIGN_DOWN(addr, PAGE_SIZE); + mlx5e_page_release_fragmented(rq, dma_info->frag_page); + } + bitmap_clear(shampo->bitmap, header_index, 1); +} + +static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + u16 data_bcnt = mpwrq_get_cqe_byte_cnt(cqe) - cqe->shampo.header_size; + u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe); + u32 wqe_offset = be32_to_cpu(cqe->shampo.data_offset); + u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); + u32 data_offset = wqe_offset & (PAGE_SIZE - 1); + u32 cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); + u16 wqe_id = be16_to_cpu(cqe->wqe_id); + u32 page_idx = wqe_offset >> PAGE_SHIFT; + u16 head_size = cqe->shampo.header_size; + struct sk_buff **skb = &rq->hw_gro_data->skb; + bool flush = cqe->shampo.flush; + bool match = cqe->shampo.match; + struct mlx5e_rq_stats *stats = rq->stats; + struct mlx5e_rx_wqe_ll *wqe; + struct mlx5e_mpw_info *wi; + struct mlx5_wq_ll *wq; + + wi = mlx5e_get_mpw_info(rq, wqe_id); + wi->consumed_strides += cstrides; + + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + mlx5e_handle_rx_err_cqe(rq, cqe); + goto mpwrq_cqe_out; + } + + if (unlikely(mpwrq_is_filler_cqe(cqe))) { + stats->mpwqe_filler_cqes++; + stats->mpwqe_filler_strides += cstrides; + goto mpwrq_cqe_out; + } + + stats->gro_match_packets += match; + + if (*skb && (!match || !(mlx5e_hw_gro_skb_has_enough_space(*skb, data_bcnt)))) { + match = false; + mlx5e_shampo_flush_skb(rq, cqe, match); + } + + if (!*skb) { + if (likely(head_size)) + *skb = mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index); + else + *skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe, cqe_bcnt, + data_offset, page_idx); + if (unlikely(!*skb)) + goto free_hd_entry; + + NAPI_GRO_CB(*skb)->count = 1; + skb_shinfo(*skb)->gso_size = cqe_bcnt - head_size; + } else { + NAPI_GRO_CB(*skb)->count++; + if (NAPI_GRO_CB(*skb)->count == 2 && + rq->hw_gro_data->fk.basic.n_proto == htons(ETH_P_IP)) { + void *hd_addr = mlx5e_shampo_get_packet_hd(rq, header_index); + int nhoff = ETH_HLEN + rq->hw_gro_data->fk.control.thoff - + sizeof(struct iphdr); + struct iphdr *iph = (struct iphdr *)(hd_addr + nhoff); + + rq->hw_gro_data->second_ip_id = ntohs(iph->id); + } + } + + if (likely(head_size)) { + struct mlx5e_frag_page *frag_page; + + frag_page = &wi->alloc_units.frag_pages[page_idx]; + mlx5e_fill_skb_data(*skb, rq, frag_page, data_bcnt, data_offset); + } + + mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb); + if (flush) + mlx5e_shampo_flush_skb(rq, cqe, match); +free_hd_entry: + mlx5e_free_rx_shampo_hd_entry(rq, header_index); +mpwrq_cqe_out: + if (likely(wi->consumed_strides < rq->mpwqe.num_strides)) + return; + + wq = &rq->mpwqe.wq; + wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); + mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); +} + +static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); + u16 wqe_id = be16_to_cpu(cqe->wqe_id); + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, wqe_id); + u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); + u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; + u32 head_offset = wqe_offset & ((1 << rq->mpwqe.page_shift) - 1); + u32 page_idx = wqe_offset >> rq->mpwqe.page_shift; + struct mlx5e_rx_wqe_ll *wqe; + struct mlx5_wq_ll *wq; + struct sk_buff *skb; + u16 cqe_bcnt; + + wi->consumed_strides += cstrides; + + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + mlx5e_handle_rx_err_cqe(rq, cqe); + goto mpwrq_cqe_out; + } + + if (unlikely(mpwrq_is_filler_cqe(cqe))) { + struct mlx5e_rq_stats *stats = rq->stats; + + stats->mpwqe_filler_cqes++; + stats->mpwqe_filler_strides += cstrides; + goto mpwrq_cqe_out; + } + + cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); + + skb = INDIRECT_CALL_3(rq->mpwqe.skb_from_cqe_mpwrq, + mlx5e_skb_from_cqe_mpwrq_linear, + mlx5e_skb_from_cqe_mpwrq_nonlinear, + mlx5e_xsk_skb_from_cqe_mpwrq_linear, + rq, wi, cqe, cqe_bcnt, head_offset, + page_idx); + if (!skb) + goto mpwrq_cqe_out; + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + + if (mlx5e_cqe_regb_chain(cqe)) + if (!mlx5e_tc_update_skb_nic(cqe, skb)) { + dev_kfree_skb_any(skb); + goto mpwrq_cqe_out; + } + + napi_gro_receive(rq->cq.napi, skb); + +mpwrq_cqe_out: + if (likely(wi->consumed_strides < rq->mpwqe.num_strides)) + return; + + wq = &rq->mpwqe.wq; + wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); + mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); +} + +static int mlx5e_rx_cq_process_enhanced_cqe_comp(struct mlx5e_rq *rq, + struct mlx5_cqwq *cqwq, + int budget_rem) +{ + struct mlx5_cqe64 *cqe, *title_cqe = NULL; + struct mlx5e_cq_decomp *cqd = &rq->cqd; + int work_done = 0; + + cqe = mlx5_cqwq_get_cqe_enahnced_comp(cqwq); + if (!cqe) + return work_done; + + if (cqd->last_cqe_title && + (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED)) { + rq->stats->cqe_compress_blks++; + cqd->last_cqe_title = false; + } + + do { + if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) { + if (title_cqe) { + mlx5e_read_enhanced_title_slot(rq, title_cqe); + title_cqe = NULL; + rq->stats->cqe_compress_blks++; + } + work_done += + mlx5e_decompress_enhanced_cqe(rq, cqwq, cqe, + budget_rem - work_done); + continue; + } + title_cqe = cqe; + mlx5_cqwq_pop(cqwq); + + INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq, + mlx5e_handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq_shampo, + rq, cqe); + work_done++; + } while (work_done < budget_rem && + (cqe = mlx5_cqwq_get_cqe_enahnced_comp(cqwq))); + + /* last cqe might be title on next poll bulk */ + if (title_cqe) { + mlx5e_read_enhanced_title_slot(rq, title_cqe); + cqd->last_cqe_title = true; + } + + return work_done; +} + +static int mlx5e_rx_cq_process_basic_cqe_comp(struct mlx5e_rq *rq, + struct mlx5_cqwq *cqwq, + int budget_rem) +{ + struct mlx5_cqe64 *cqe; + int work_done = 0; + + if (rq->cqd.left) + work_done += mlx5e_decompress_cqes_cont(rq, cqwq, 0, budget_rem); + + while (work_done < budget_rem && (cqe = mlx5_cqwq_get_cqe(cqwq))) { + if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) { + work_done += + mlx5e_decompress_cqes_start(rq, cqwq, + budget_rem - work_done); + continue; + } + + mlx5_cqwq_pop(cqwq); + INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq, + mlx5e_handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq_shampo, + rq, cqe); + work_done++; + } + + return work_done; +} + +int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) +{ + struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); + struct mlx5_cqwq *cqwq = &cq->wq; + int work_done; + + if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) + return 0; + + if (test_bit(MLX5E_RQ_STATE_MINI_CQE_ENHANCED, &rq->state)) + work_done = mlx5e_rx_cq_process_enhanced_cqe_comp(rq, cqwq, + budget); + else + work_done = mlx5e_rx_cq_process_basic_cqe_comp(rq, cqwq, + budget); + + if (work_done == 0) + return 0; + + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) && rq->hw_gro_data->skb) + mlx5e_shampo_flush_skb(rq, NULL, false); + + if (rcu_access_pointer(rq->xdp_prog)) + mlx5e_xdp_rx_poll_complete(rq); + + mlx5_cqwq_update_db_record(cqwq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + return work_done; +} + +#ifdef CONFIG_MLX5_CORE_IPOIB + +#define MLX5_IB_GRH_SGID_OFFSET 8 +#define MLX5_IB_GRH_DGID_OFFSET 24 +#define MLX5_GID_SIZE 16 + +static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u32 cqe_bcnt, + struct sk_buff *skb) +{ + struct hwtstamp_config *tstamp; + struct mlx5e_rq_stats *stats; + struct net_device *netdev; + struct mlx5e_priv *priv; + char *pseudo_header; + u32 flags_rqpn; + u32 qpn; + u8 *dgid; + u8 g; + + qpn = be32_to_cpu(cqe->sop_drop_qpn) & 0xffffff; + netdev = mlx5i_pkey_get_netdev(rq->netdev, qpn); + + /* No mapping present, cannot process SKB. This might happen if a child + * interface is going down while having unprocessed CQEs on parent RQ + */ + if (unlikely(!netdev)) { + /* TODO: add drop counters support */ + skb->dev = NULL; + pr_warn_once("Unable to map QPN %u to dev - dropping skb\n", qpn); + return; + } + + priv = mlx5i_epriv(netdev); + tstamp = &priv->tstamp; + stats = &priv->channel_stats[rq->ix]->rq; + + flags_rqpn = be32_to_cpu(cqe->flags_rqpn); + g = (flags_rqpn >> 28) & 3; + dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET; + if ((!g) || dgid[0] != 0xff) + skb->pkt_type = PACKET_HOST; + else if (memcmp(dgid, netdev->broadcast + 4, MLX5_GID_SIZE) == 0) + skb->pkt_type = PACKET_BROADCAST; + else + skb->pkt_type = PACKET_MULTICAST; + + /* Drop packets that this interface sent, ie multicast packets + * that the HCA has replicated. + */ + if (g && (qpn == (flags_rqpn & 0xffffff)) && + (memcmp(netdev->dev_addr + 4, skb->data + MLX5_IB_GRH_SGID_OFFSET, + MLX5_GID_SIZE) == 0)) { + skb->dev = NULL; + return; + } + + skb_pull(skb, MLX5_IB_GRH_BYTES); + + skb->protocol = *((__be16 *)(skb->data)); + + if (netdev->features & NETIF_F_RXCSUM) { + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum = csum_unfold((__force __sum16)cqe->check_sum); + stats->csum_complete++; + } else { + skb->ip_summed = CHECKSUM_NONE; + stats->csum_none++; + } + + if (unlikely(mlx5e_rx_hw_stamp(tstamp))) + skb_hwtstamps(skb)->hwtstamp = mlx5e_cqe_ts_to_ns(rq->ptp_cyc2time, + rq->clock, get_cqe_ts(cqe)); + skb_record_rx_queue(skb, rq->ix); + + if (likely(netdev->features & NETIF_F_RXHASH)) + mlx5e_skb_set_hash(cqe, skb); + + /* 20 bytes of ipoib header and 4 for encap existing */ + pseudo_header = skb_push(skb, MLX5_IPOIB_PSEUDO_LEN); + memset(pseudo_header, 0, MLX5_IPOIB_PSEUDO_LEN); + skb_reset_mac_header(skb); + skb_pull(skb, MLX5_IPOIB_HARD_LEN); + + skb->dev = netdev; + + stats->packets++; + stats->bytes += cqe_bcnt; +} + +static void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + struct mlx5e_wqe_frag_info *wi; + struct sk_buff *skb; + u32 cqe_bcnt; + u16 ci; + + ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); + wi = get_frag(rq, ci); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + rq->stats->wqe_err++; + goto wq_cyc_pop; + } + + skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, + mlx5e_skb_from_cqe_linear, + mlx5e_skb_from_cqe_nonlinear, + rq, wi, cqe, cqe_bcnt); + if (!skb) + goto wq_cyc_pop; + + mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + if (unlikely(!skb->dev)) { + dev_kfree_skb_any(skb); + goto wq_cyc_pop; + } + napi_gro_receive(rq->cq.napi, skb); + +wq_cyc_pop: + mlx5_wq_cyc_pop(wq); +} + +const struct mlx5e_rx_handlers mlx5i_rx_handlers = { + .handle_rx_cqe = mlx5i_handle_rx_cqe, + .handle_rx_cqe_mpwqe = NULL, /* Not supported */ +}; +#endif /* CONFIG_MLX5_CORE_IPOIB */ + +int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool xsk) +{ + struct net_device *netdev = rq->netdev; + struct mlx5_core_dev *mdev = rq->mdev; + struct mlx5e_priv *priv = rq->priv; + + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + rq->mpwqe.skb_from_cqe_mpwrq = xsk ? + mlx5e_xsk_skb_from_cqe_mpwrq_linear : + mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ? + mlx5e_skb_from_cqe_mpwrq_linear : + mlx5e_skb_from_cqe_mpwrq_nonlinear; + rq->post_wqes = mlx5e_post_rx_mpwqes; + rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; + + if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { + rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe_shampo; + if (!rq->handle_rx_cqe) { + netdev_err(netdev, "RX handler of SHAMPO MPWQE RQ is not set\n"); + return -EINVAL; + } + } else { + rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe; + if (!rq->handle_rx_cqe) { + netdev_err(netdev, "RX handler of MPWQE RQ is not set\n"); + return -EINVAL; + } + } + + break; + default: /* MLX5_WQ_TYPE_CYCLIC */ + rq->wqe.skb_from_cqe = xsk ? + mlx5e_xsk_skb_from_cqe_linear : + mlx5e_rx_is_linear_skb(mdev, params, NULL) ? + mlx5e_skb_from_cqe_linear : + mlx5e_skb_from_cqe_nonlinear; + rq->post_wqes = mlx5e_post_rx_wqes; + rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; + rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe; + if (!rq->handle_rx_cqe) { + netdev_err(netdev, "RX handler of RQ is not set\n"); + return -EINVAL; + } + } + + return 0; +} + +static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + struct mlx5e_wqe_frag_info *wi; + struct sk_buff *skb; + u32 cqe_bcnt; + u16 trap_id; + u16 ci; + + trap_id = get_cqe_flow_tag(cqe); + ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); + wi = get_frag(rq, ci); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + rq->stats->wqe_err++; + goto wq_cyc_pop; + } + + skb = mlx5e_skb_from_cqe_nonlinear(rq, wi, cqe, cqe_bcnt); + if (!skb) + goto wq_cyc_pop; + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + skb_push(skb, ETH_HLEN); + + mlx5_devlink_trap_report(rq->mdev, trap_id, skb, + rq->netdev->devlink_port); + dev_kfree_skb_any(skb); + +wq_cyc_pop: + mlx5_wq_cyc_pop(wq); +} + +void mlx5e_rq_set_trap_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params) +{ + rq->wqe.skb_from_cqe = mlx5e_rx_is_linear_skb(rq->mdev, params, NULL) ? + mlx5e_skb_from_cqe_linear : + mlx5e_skb_from_cqe_nonlinear; + rq->post_wqes = mlx5e_post_rx_wqes; + rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; + rq->handle_rx_cqe = mlx5e_trap_handle_rx_cqe; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c new file mode 100644 index 0000000000..08a75654f5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -0,0 +1,371 @@ +/* + * Copyright (c) 2016, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include "en.h" +#include "en/port.h" +#include "eswitch.h" + +static int mlx5e_test_health_info(struct mlx5e_priv *priv) +{ + struct mlx5_core_health *health = &priv->mdev->priv.health; + + return health->fatal_error ? 1 : 0; +} + +static int mlx5e_test_link_state(struct mlx5e_priv *priv) +{ + u8 port_state; + + if (!netif_carrier_ok(priv->netdev)) + return 1; + + port_state = mlx5_query_vport_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0); + return port_state == VPORT_STATE_UP ? 0 : 1; +} + +static int mlx5e_test_link_speed(struct mlx5e_priv *priv) +{ + u32 speed; + + if (!netif_carrier_ok(priv->netdev)) + return 1; + + return mlx5e_port_linkspeed(priv->mdev, &speed); +} + +struct mlx5ehdr { + __be32 version; + __be64 magic; +}; + +#ifdef CONFIG_INET +/* loopback test */ +#define MLX5E_TEST_PKT_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) +\ + sizeof(struct udphdr) + sizeof(struct mlx5ehdr)) +#define MLX5E_TEST_MAGIC 0x5AEED15C001ULL + +static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv) +{ + struct sk_buff *skb = NULL; + struct mlx5ehdr *mlxh; + struct ethhdr *ethh; + struct udphdr *udph; + struct iphdr *iph; + int iplen; + + skb = netdev_alloc_skb(priv->netdev, MLX5E_TEST_PKT_SIZE); + if (!skb) { + netdev_err(priv->netdev, "\tFailed to alloc loopback skb\n"); + return NULL; + } + + net_prefetchw(skb->data); + skb_reserve(skb, NET_IP_ALIGN); + + /* Reserve for ethernet and IP header */ + ethh = skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + + skb_set_network_header(skb, skb->len); + iph = skb_put(skb, sizeof(struct iphdr)); + + skb_set_transport_header(skb, skb->len); + udph = skb_put(skb, sizeof(struct udphdr)); + + /* Fill ETH header */ + ether_addr_copy(ethh->h_dest, priv->netdev->dev_addr); + eth_zero_addr(ethh->h_source); + ethh->h_proto = htons(ETH_P_IP); + + /* Fill UDP header */ + udph->source = htons(9); + udph->dest = htons(9); /* Discard Protocol */ + udph->len = htons(sizeof(struct mlx5ehdr) + sizeof(struct udphdr)); + udph->check = 0; + + /* Fill IP header */ + iph->ihl = 5; + iph->ttl = 32; + iph->version = 4; + iph->protocol = IPPROTO_UDP; + iplen = sizeof(struct iphdr) + sizeof(struct udphdr) + + sizeof(struct mlx5ehdr); + iph->tot_len = htons(iplen); + iph->frag_off = 0; + iph->saddr = 0; + iph->daddr = 0; + iph->tos = 0; + iph->id = 0; + ip_send_check(iph); + + /* Fill test header and data */ + mlxh = skb_put(skb, sizeof(*mlxh)); + mlxh->version = 0; + mlxh->magic = cpu_to_be64(MLX5E_TEST_MAGIC); + + skb->csum = 0; + skb->ip_summed = CHECKSUM_PARTIAL; + udp4_hwcsum(skb, iph->saddr, iph->daddr); + + skb->protocol = htons(ETH_P_IP); + skb->pkt_type = PACKET_HOST; + skb->dev = priv->netdev; + + return skb; +} + +struct mlx5e_lbt_priv { + struct packet_type pt; + struct completion comp; + bool loopback_ok; + bool local_lb; +}; + +static int +mlx5e_test_loopback_validate(struct sk_buff *skb, + struct net_device *ndev, + struct packet_type *pt, + struct net_device *orig_ndev) +{ + struct mlx5e_lbt_priv *lbtp = pt->af_packet_priv; + struct mlx5ehdr *mlxh; + struct ethhdr *ethh; + struct udphdr *udph; + struct iphdr *iph; + + /* We are only going to peek, no need to clone the SKB */ + if (MLX5E_TEST_PKT_SIZE - ETH_HLEN > skb_headlen(skb)) + goto out; + + ethh = (struct ethhdr *)skb_mac_header(skb); + if (!ether_addr_equal(ethh->h_dest, orig_ndev->dev_addr)) + goto out; + + iph = ip_hdr(skb); + if (iph->protocol != IPPROTO_UDP) + goto out; + + /* Don't assume skb_transport_header() was set */ + udph = (struct udphdr *)((u8 *)iph + 4 * iph->ihl); + if (udph->dest != htons(9)) + goto out; + + mlxh = (struct mlx5ehdr *)((char *)udph + sizeof(*udph)); + if (mlxh->magic != cpu_to_be64(MLX5E_TEST_MAGIC)) + goto out; /* so close ! */ + + /* bingo */ + lbtp->loopback_ok = true; + complete(&lbtp->comp); +out: + kfree_skb(skb); + return 0; +} + +static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv, + struct mlx5e_lbt_priv *lbtp) +{ + int err = 0; + + /* Temporarily enable local_lb */ + err = mlx5_nic_vport_query_local_lb(priv->mdev, &lbtp->local_lb); + if (err) + return err; + + if (!lbtp->local_lb) { + err = mlx5_nic_vport_update_local_lb(priv->mdev, true); + if (err) + return err; + } + + err = mlx5e_refresh_tirs(priv, true, false); + if (err) + goto out; + + lbtp->loopback_ok = false; + init_completion(&lbtp->comp); + + lbtp->pt.type = htons(ETH_P_IP); + lbtp->pt.func = mlx5e_test_loopback_validate; + lbtp->pt.dev = priv->netdev; + lbtp->pt.af_packet_priv = lbtp; + dev_add_pack(&lbtp->pt); + + return 0; + +out: + if (!lbtp->local_lb) + mlx5_nic_vport_update_local_lb(priv->mdev, false); + + return err; +} + +static void mlx5e_test_loopback_cleanup(struct mlx5e_priv *priv, + struct mlx5e_lbt_priv *lbtp) +{ + if (!lbtp->local_lb) + mlx5_nic_vport_update_local_lb(priv->mdev, false); + + dev_remove_pack(&lbtp->pt); + mlx5e_refresh_tirs(priv, false, false); +} + +static int mlx5e_cond_loopback(struct mlx5e_priv *priv) +{ + if (is_mdev_switchdev_mode(priv->mdev)) + return -EOPNOTSUPP; + + return 0; +} + +#define MLX5E_LB_VERIFY_TIMEOUT (msecs_to_jiffies(200)) +static int mlx5e_test_loopback(struct mlx5e_priv *priv) +{ + struct mlx5e_lbt_priv *lbtp; + struct sk_buff *skb = NULL; + int err; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + netdev_err(priv->netdev, + "\tCan't perform loopback test while device is down\n"); + return -ENODEV; + } + + lbtp = kzalloc(sizeof(*lbtp), GFP_KERNEL); + if (!lbtp) + return -ENOMEM; + lbtp->loopback_ok = false; + + err = mlx5e_test_loopback_setup(priv, lbtp); + if (err) + goto out; + + skb = mlx5e_test_get_udp_skb(priv); + if (!skb) { + err = -ENOMEM; + goto cleanup; + } + + skb_set_queue_mapping(skb, 0); + err = dev_queue_xmit(skb); + if (err) { + netdev_err(priv->netdev, + "\tFailed to xmit loopback packet err(%d)\n", + err); + goto cleanup; + } + + wait_for_completion_timeout(&lbtp->comp, MLX5E_LB_VERIFY_TIMEOUT); + err = !lbtp->loopback_ok; + +cleanup: + mlx5e_test_loopback_cleanup(priv, lbtp); +out: + kfree(lbtp); + return err; +} +#endif + +typedef int (*mlx5e_st_func)(struct mlx5e_priv *); + +struct mlx5e_st { + char name[ETH_GSTRING_LEN]; + mlx5e_st_func st_func; + mlx5e_st_func cond_func; +}; + +static struct mlx5e_st mlx5e_sts[] = { + { "Link Test", mlx5e_test_link_state }, + { "Speed Test", mlx5e_test_link_speed }, + { "Health Test", mlx5e_test_health_info }, +#ifdef CONFIG_INET + { "Loopback Test", mlx5e_test_loopback, mlx5e_cond_loopback }, +#endif +}; + +#define MLX5E_ST_NUM ARRAY_SIZE(mlx5e_sts) + +void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest, + u64 *buf) +{ + struct mlx5e_priv *priv = netdev_priv(ndev); + int i, count = 0; + + mutex_lock(&priv->state_lock); + netdev_info(ndev, "Self test begin..\n"); + + for (i = 0; i < MLX5E_ST_NUM; i++) { + struct mlx5e_st st = mlx5e_sts[i]; + + if (st.cond_func && st.cond_func(priv)) + continue; + netdev_info(ndev, "\t[%d] %s start..\n", i, st.name); + buf[count] = st.st_func(priv); + netdev_info(ndev, "\t[%d] %s end: result(%lld)\n", i, st.name, buf[count]); + count++; + } + + mutex_unlock(&priv->state_lock); + + for (i = 0; i < count; i++) { + if (buf[i]) { + etest->flags |= ETH_TEST_FL_FAILED; + break; + } + } + netdev_info(ndev, "Self test out: status flags(0x%x)\n", + etest->flags); +} + +int mlx5e_self_test_fill_strings(struct mlx5e_priv *priv, u8 *data) +{ + int i, count = 0; + + for (i = 0; i < MLX5E_ST_NUM; i++) { + struct mlx5e_st st = mlx5e_sts[i]; + + if (st.cond_func && st.cond_func(priv)) + continue; + if (data) + strcpy(data + count * ETH_GSTRING_LEN, st.name); + count++; + } + return count; +} + +int mlx5e_self_test_num(struct mlx5e_priv *priv) +{ + return mlx5e_self_test_fill_strings(priv, NULL); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c new file mode 100644 index 0000000000..4b96ad6571 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -0,0 +1,2513 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "lib/events.h" +#include "en.h" +#include "en_accel/ktls.h" +#include "en_accel/en_accel.h" +#include "en/ptp.h" +#include "en/port.h" + +#ifdef CONFIG_PAGE_POOL_STATS +#include +#endif + +static unsigned int stats_grps_num(struct mlx5e_priv *priv) +{ + return !priv->profile->stats_grps_num ? 0 : + priv->profile->stats_grps_num(priv); +} + +unsigned int mlx5e_stats_total_num(struct mlx5e_priv *priv) +{ + mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps; + const unsigned int num_stats_grps = stats_grps_num(priv); + unsigned int total = 0; + int i; + + for (i = 0; i < num_stats_grps; i++) + total += stats_grps[i]->get_num_stats(priv); + + return total; +} + +void mlx5e_stats_update_ndo_stats(struct mlx5e_priv *priv) +{ + mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps; + const unsigned int num_stats_grps = stats_grps_num(priv); + int i; + + for (i = num_stats_grps - 1; i >= 0; i--) + if (stats_grps[i]->update_stats && + stats_grps[i]->update_stats_mask & MLX5E_NDO_UPDATE_STATS) + stats_grps[i]->update_stats(priv); +} + +void mlx5e_stats_update(struct mlx5e_priv *priv) +{ + mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps; + const unsigned int num_stats_grps = stats_grps_num(priv); + int i; + + for (i = num_stats_grps - 1; i >= 0; i--) + if (stats_grps[i]->update_stats) + stats_grps[i]->update_stats(priv); +} + +void mlx5e_stats_fill(struct mlx5e_priv *priv, u64 *data, int idx) +{ + mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps; + const unsigned int num_stats_grps = stats_grps_num(priv); + int i; + + for (i = 0; i < num_stats_grps; i++) + idx = stats_grps[i]->fill_stats(priv, data, idx); +} + +void mlx5e_stats_fill_strings(struct mlx5e_priv *priv, u8 *data) +{ + mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps; + const unsigned int num_stats_grps = stats_grps_num(priv); + int i, idx = 0; + + for (i = 0; i < num_stats_grps; i++) + idx = stats_grps[i]->fill_strings(priv, data, idx); +} + +/* Concrete NIC Stats */ + +static const struct counter_desc sw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_added_vlan_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_nop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_mpwqe_blks) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_mpwqe_pkts) }, + +#ifdef CONFIG_MLX5_EN_TLS + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_resync_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_skip_no_sync_data) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_no_sync_data) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_bypass_req) }, +#endif + + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_skbs) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_match_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_large_hds) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_ecn_mark) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_removed_vlan_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail_slow) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_redirect) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_xmit) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_mpwqe) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_inlnw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_nops) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_cqe) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_recover) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqe_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_xmit) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_mpwqe) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_inlnw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_nops) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_cqes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler_cqes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler_strides) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_oversize_pkts_sw_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) }, +#ifdef CONFIG_MLX5_EN_ARFS + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_add) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_request_in) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_request_out) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_expired) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) }, +#endif + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_recover) }, +#ifdef CONFIG_PAGE_POOL_STATS + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_fast) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_slow) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_slow_high_order) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_empty) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_refill) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_waive) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_cached) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_cache_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_ring) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_ring_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_released_ref) }, +#endif +#ifdef CONFIG_MLX5_EN_TLS + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_pkt) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_start) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_end) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_skip) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_res_ok) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_res_retry) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_res_skip) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_err) }, +#endif + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_events) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_poll) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_arm) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_aff_change) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_force_irq) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_eq_rearm) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_complete) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_unnecessary) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_unnecessary_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_ecn_mark) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_removed_vlan_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_xdp_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_xdp_redirect) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_wqe_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_mpwqe_filler_cqes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_mpwqe_filler_strides) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_oversize_pkts_sw_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_buff_alloc_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_cqe_compress_blks) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_cqe_compress_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_congst_umr) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_xmit) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_mpwqe) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_inlnw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_cqes) }, +}; + +#define NUM_SW_COUNTERS ARRAY_SIZE(sw_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(sw) +{ + return NUM_SW_COUNTERS; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(sw) +{ + int i; + + for (i = 0; i < NUM_SW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].format); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(sw) +{ + int i; + + for (i = 0; i < NUM_SW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, sw_stats_desc, i); + return idx; +} + +static void mlx5e_stats_grp_sw_update_stats_xdp_red(struct mlx5e_sw_stats *s, + struct mlx5e_xdpsq_stats *xdpsq_red_stats) +{ + s->tx_xdp_xmit += xdpsq_red_stats->xmit; + s->tx_xdp_mpwqe += xdpsq_red_stats->mpwqe; + s->tx_xdp_inlnw += xdpsq_red_stats->inlnw; + s->tx_xdp_nops += xdpsq_red_stats->nops; + s->tx_xdp_full += xdpsq_red_stats->full; + s->tx_xdp_err += xdpsq_red_stats->err; + s->tx_xdp_cqes += xdpsq_red_stats->cqes; +} + +static void mlx5e_stats_grp_sw_update_stats_xdpsq(struct mlx5e_sw_stats *s, + struct mlx5e_xdpsq_stats *xdpsq_stats) +{ + s->rx_xdp_tx_xmit += xdpsq_stats->xmit; + s->rx_xdp_tx_mpwqe += xdpsq_stats->mpwqe; + s->rx_xdp_tx_inlnw += xdpsq_stats->inlnw; + s->rx_xdp_tx_nops += xdpsq_stats->nops; + s->rx_xdp_tx_full += xdpsq_stats->full; + s->rx_xdp_tx_err += xdpsq_stats->err; + s->rx_xdp_tx_cqe += xdpsq_stats->cqes; +} + +static void mlx5e_stats_grp_sw_update_stats_xsksq(struct mlx5e_sw_stats *s, + struct mlx5e_xdpsq_stats *xsksq_stats) +{ + s->tx_xsk_xmit += xsksq_stats->xmit; + s->tx_xsk_mpwqe += xsksq_stats->mpwqe; + s->tx_xsk_inlnw += xsksq_stats->inlnw; + s->tx_xsk_full += xsksq_stats->full; + s->tx_xsk_err += xsksq_stats->err; + s->tx_xsk_cqes += xsksq_stats->cqes; +} + +static void mlx5e_stats_grp_sw_update_stats_xskrq(struct mlx5e_sw_stats *s, + struct mlx5e_rq_stats *xskrq_stats) +{ + s->rx_xsk_packets += xskrq_stats->packets; + s->rx_xsk_bytes += xskrq_stats->bytes; + s->rx_xsk_csum_complete += xskrq_stats->csum_complete; + s->rx_xsk_csum_unnecessary += xskrq_stats->csum_unnecessary; + s->rx_xsk_csum_unnecessary_inner += xskrq_stats->csum_unnecessary_inner; + s->rx_xsk_csum_none += xskrq_stats->csum_none; + s->rx_xsk_ecn_mark += xskrq_stats->ecn_mark; + s->rx_xsk_removed_vlan_packets += xskrq_stats->removed_vlan_packets; + s->rx_xsk_xdp_drop += xskrq_stats->xdp_drop; + s->rx_xsk_xdp_redirect += xskrq_stats->xdp_redirect; + s->rx_xsk_wqe_err += xskrq_stats->wqe_err; + s->rx_xsk_mpwqe_filler_cqes += xskrq_stats->mpwqe_filler_cqes; + s->rx_xsk_mpwqe_filler_strides += xskrq_stats->mpwqe_filler_strides; + s->rx_xsk_oversize_pkts_sw_drop += xskrq_stats->oversize_pkts_sw_drop; + s->rx_xsk_buff_alloc_err += xskrq_stats->buff_alloc_err; + s->rx_xsk_cqe_compress_blks += xskrq_stats->cqe_compress_blks; + s->rx_xsk_cqe_compress_pkts += xskrq_stats->cqe_compress_pkts; + s->rx_xsk_congst_umr += xskrq_stats->congst_umr; +} + +static void mlx5e_stats_grp_sw_update_stats_rq_stats(struct mlx5e_sw_stats *s, + struct mlx5e_rq_stats *rq_stats) +{ + s->rx_packets += rq_stats->packets; + s->rx_bytes += rq_stats->bytes; + s->rx_lro_packets += rq_stats->lro_packets; + s->rx_lro_bytes += rq_stats->lro_bytes; + s->rx_gro_packets += rq_stats->gro_packets; + s->rx_gro_bytes += rq_stats->gro_bytes; + s->rx_gro_skbs += rq_stats->gro_skbs; + s->rx_gro_match_packets += rq_stats->gro_match_packets; + s->rx_gro_large_hds += rq_stats->gro_large_hds; + s->rx_ecn_mark += rq_stats->ecn_mark; + s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets; + s->rx_csum_none += rq_stats->csum_none; + s->rx_csum_complete += rq_stats->csum_complete; + s->rx_csum_complete_tail += rq_stats->csum_complete_tail; + s->rx_csum_complete_tail_slow += rq_stats->csum_complete_tail_slow; + s->rx_csum_unnecessary += rq_stats->csum_unnecessary; + s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; + s->rx_xdp_drop += rq_stats->xdp_drop; + s->rx_xdp_redirect += rq_stats->xdp_redirect; + s->rx_wqe_err += rq_stats->wqe_err; + s->rx_mpwqe_filler_cqes += rq_stats->mpwqe_filler_cqes; + s->rx_mpwqe_filler_strides += rq_stats->mpwqe_filler_strides; + s->rx_oversize_pkts_sw_drop += rq_stats->oversize_pkts_sw_drop; + s->rx_buff_alloc_err += rq_stats->buff_alloc_err; + s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks; + s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts; + s->rx_congst_umr += rq_stats->congst_umr; +#ifdef CONFIG_MLX5_EN_ARFS + s->rx_arfs_add += rq_stats->arfs_add; + s->rx_arfs_request_in += rq_stats->arfs_request_in; + s->rx_arfs_request_out += rq_stats->arfs_request_out; + s->rx_arfs_expired += rq_stats->arfs_expired; + s->rx_arfs_err += rq_stats->arfs_err; +#endif + s->rx_recover += rq_stats->recover; +#ifdef CONFIG_PAGE_POOL_STATS + s->rx_pp_alloc_fast += rq_stats->pp_alloc_fast; + s->rx_pp_alloc_slow += rq_stats->pp_alloc_slow; + s->rx_pp_alloc_empty += rq_stats->pp_alloc_empty; + s->rx_pp_alloc_refill += rq_stats->pp_alloc_refill; + s->rx_pp_alloc_waive += rq_stats->pp_alloc_waive; + s->rx_pp_alloc_slow_high_order += rq_stats->pp_alloc_slow_high_order; + s->rx_pp_recycle_cached += rq_stats->pp_recycle_cached; + s->rx_pp_recycle_cache_full += rq_stats->pp_recycle_cache_full; + s->rx_pp_recycle_ring += rq_stats->pp_recycle_ring; + s->rx_pp_recycle_ring_full += rq_stats->pp_recycle_ring_full; + s->rx_pp_recycle_released_ref += rq_stats->pp_recycle_released_ref; +#endif +#ifdef CONFIG_MLX5_EN_TLS + s->rx_tls_decrypted_packets += rq_stats->tls_decrypted_packets; + s->rx_tls_decrypted_bytes += rq_stats->tls_decrypted_bytes; + s->rx_tls_resync_req_pkt += rq_stats->tls_resync_req_pkt; + s->rx_tls_resync_req_start += rq_stats->tls_resync_req_start; + s->rx_tls_resync_req_end += rq_stats->tls_resync_req_end; + s->rx_tls_resync_req_skip += rq_stats->tls_resync_req_skip; + s->rx_tls_resync_res_ok += rq_stats->tls_resync_res_ok; + s->rx_tls_resync_res_retry += rq_stats->tls_resync_res_retry; + s->rx_tls_resync_res_skip += rq_stats->tls_resync_res_skip; + s->rx_tls_err += rq_stats->tls_err; +#endif +} + +static void mlx5e_stats_grp_sw_update_stats_ch_stats(struct mlx5e_sw_stats *s, + struct mlx5e_ch_stats *ch_stats) +{ + s->ch_events += ch_stats->events; + s->ch_poll += ch_stats->poll; + s->ch_arm += ch_stats->arm; + s->ch_aff_change += ch_stats->aff_change; + s->ch_force_irq += ch_stats->force_irq; + s->ch_eq_rearm += ch_stats->eq_rearm; +} + +static void mlx5e_stats_grp_sw_update_stats_sq(struct mlx5e_sw_stats *s, + struct mlx5e_sq_stats *sq_stats) +{ + s->tx_packets += sq_stats->packets; + s->tx_bytes += sq_stats->bytes; + s->tx_tso_packets += sq_stats->tso_packets; + s->tx_tso_bytes += sq_stats->tso_bytes; + s->tx_tso_inner_packets += sq_stats->tso_inner_packets; + s->tx_tso_inner_bytes += sq_stats->tso_inner_bytes; + s->tx_added_vlan_packets += sq_stats->added_vlan_packets; + s->tx_nop += sq_stats->nop; + s->tx_mpwqe_blks += sq_stats->mpwqe_blks; + s->tx_mpwqe_pkts += sq_stats->mpwqe_pkts; + s->tx_queue_stopped += sq_stats->stopped; + s->tx_queue_wake += sq_stats->wake; + s->tx_queue_dropped += sq_stats->dropped; + s->tx_cqe_err += sq_stats->cqe_err; + s->tx_recover += sq_stats->recover; + s->tx_xmit_more += sq_stats->xmit_more; + s->tx_csum_partial_inner += sq_stats->csum_partial_inner; + s->tx_csum_none += sq_stats->csum_none; + s->tx_csum_partial += sq_stats->csum_partial; +#ifdef CONFIG_MLX5_EN_TLS + s->tx_tls_encrypted_packets += sq_stats->tls_encrypted_packets; + s->tx_tls_encrypted_bytes += sq_stats->tls_encrypted_bytes; + s->tx_tls_ooo += sq_stats->tls_ooo; + s->tx_tls_dump_bytes += sq_stats->tls_dump_bytes; + s->tx_tls_dump_packets += sq_stats->tls_dump_packets; + s->tx_tls_resync_bytes += sq_stats->tls_resync_bytes; + s->tx_tls_skip_no_sync_data += sq_stats->tls_skip_no_sync_data; + s->tx_tls_drop_no_sync_data += sq_stats->tls_drop_no_sync_data; + s->tx_tls_drop_bypass_req += sq_stats->tls_drop_bypass_req; +#endif + s->tx_cqes += sq_stats->cqes; +} + +static void mlx5e_stats_grp_sw_update_stats_ptp(struct mlx5e_priv *priv, + struct mlx5e_sw_stats *s) +{ + int i; + + if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) + return; + + mlx5e_stats_grp_sw_update_stats_ch_stats(s, &priv->ptp_stats.ch); + + if (priv->tx_ptp_opened) { + for (i = 0; i < priv->max_opened_tc; i++) { + mlx5e_stats_grp_sw_update_stats_sq(s, &priv->ptp_stats.sq[i]); + + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */ + barrier(); + } + } + if (priv->rx_ptp_opened) { + mlx5e_stats_grp_sw_update_stats_rq_stats(s, &priv->ptp_stats.rq); + + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */ + barrier(); + } +} + +static void mlx5e_stats_grp_sw_update_stats_qos(struct mlx5e_priv *priv, + struct mlx5e_sw_stats *s) +{ + struct mlx5e_sq_stats **stats; + u16 max_qos_sqs; + int i; + + /* Pairs with smp_store_release in mlx5e_open_qos_sq. */ + max_qos_sqs = smp_load_acquire(&priv->htb_max_qos_sqs); + stats = READ_ONCE(priv->htb_qos_sq_stats); + + for (i = 0; i < max_qos_sqs; i++) { + mlx5e_stats_grp_sw_update_stats_sq(s, READ_ONCE(stats[i])); + + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */ + barrier(); + } +} + +#ifdef CONFIG_PAGE_POOL_STATS +static void mlx5e_stats_update_stats_rq_page_pool(struct mlx5e_channel *c) +{ + struct mlx5e_rq_stats *rq_stats = c->rq.stats; + struct page_pool *pool = c->rq.page_pool; + struct page_pool_stats stats = { 0 }; + + if (!page_pool_get_stats(pool, &stats)) + return; + + rq_stats->pp_alloc_fast = stats.alloc_stats.fast; + rq_stats->pp_alloc_slow = stats.alloc_stats.slow; + rq_stats->pp_alloc_slow_high_order = stats.alloc_stats.slow_high_order; + rq_stats->pp_alloc_empty = stats.alloc_stats.empty; + rq_stats->pp_alloc_waive = stats.alloc_stats.waive; + rq_stats->pp_alloc_refill = stats.alloc_stats.refill; + + rq_stats->pp_recycle_cached = stats.recycle_stats.cached; + rq_stats->pp_recycle_cache_full = stats.recycle_stats.cache_full; + rq_stats->pp_recycle_ring = stats.recycle_stats.ring; + rq_stats->pp_recycle_ring_full = stats.recycle_stats.ring_full; + rq_stats->pp_recycle_released_ref = stats.recycle_stats.released_refcnt; +} +#else +static void mlx5e_stats_update_stats_rq_page_pool(struct mlx5e_channel *c) +{ +} +#endif + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw) +{ + struct mlx5e_sw_stats *s = &priv->stats.sw; + int i; + + memset(s, 0, sizeof(*s)); + + for (i = 0; i < priv->channels.num; i++) /* for active channels only */ + mlx5e_stats_update_stats_rq_page_pool(priv->channels.c[i]); + + for (i = 0; i < priv->stats_nch; i++) { + struct mlx5e_channel_stats *channel_stats = + priv->channel_stats[i]; + + int j; + + mlx5e_stats_grp_sw_update_stats_rq_stats(s, &channel_stats->rq); + mlx5e_stats_grp_sw_update_stats_xdpsq(s, &channel_stats->rq_xdpsq); + mlx5e_stats_grp_sw_update_stats_ch_stats(s, &channel_stats->ch); + /* xdp redirect */ + mlx5e_stats_grp_sw_update_stats_xdp_red(s, &channel_stats->xdpsq); + /* AF_XDP zero-copy */ + mlx5e_stats_grp_sw_update_stats_xskrq(s, &channel_stats->xskrq); + mlx5e_stats_grp_sw_update_stats_xsksq(s, &channel_stats->xsksq); + + for (j = 0; j < priv->max_opened_tc; j++) { + mlx5e_stats_grp_sw_update_stats_sq(s, &channel_stats->sq[j]); + + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */ + barrier(); + } + } + mlx5e_stats_grp_sw_update_stats_ptp(priv, s); + mlx5e_stats_grp_sw_update_stats_qos(priv, s); +} + +static const struct counter_desc q_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_out_of_buffer) }, +}; + +static const struct counter_desc drop_rq_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_if_down_packets) }, +}; + +#define NUM_Q_COUNTERS ARRAY_SIZE(q_stats_desc) +#define NUM_DROP_RQ_COUNTERS ARRAY_SIZE(drop_rq_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qcnt) +{ + int num_stats = 0; + + if (priv->q_counter) + num_stats += NUM_Q_COUNTERS; + + if (priv->drop_rq_q_counter) + num_stats += NUM_DROP_RQ_COUNTERS; + + return num_stats; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(qcnt) +{ + int i; + + for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + q_stats_desc[i].format); + + for (i = 0; i < NUM_DROP_RQ_COUNTERS && priv->drop_rq_q_counter; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + drop_rq_stats_desc[i].format); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qcnt) +{ + int i; + + for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++) + data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, + q_stats_desc, i); + for (i = 0; i < NUM_DROP_RQ_COUNTERS && priv->drop_rq_q_counter; i++) + data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, + drop_rq_stats_desc, i); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qcnt) +{ + struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt; + u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {}; + int ret; + + MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); + + if (priv->q_counter) { + MLX5_SET(query_q_counter_in, in, counter_set_id, + priv->q_counter); + ret = mlx5_cmd_exec_inout(priv->mdev, query_q_counter, in, out); + if (!ret) + qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out, + out, out_of_buffer); + } + + if (priv->drop_rq_q_counter) { + MLX5_SET(query_q_counter_in, in, counter_set_id, + priv->drop_rq_q_counter); + ret = mlx5_cmd_exec_inout(priv->mdev, query_q_counter, in, out); + if (!ret) + qcnt->rx_if_down_packets = MLX5_GET(query_q_counter_out, + out, out_of_buffer); + } +} + +#define VNIC_ENV_OFF(c) MLX5_BYTE_OFF(query_vnic_env_out, c) +static const struct counter_desc vnic_env_stats_steer_desc[] = { + { "rx_steer_missed_packets", + VNIC_ENV_OFF(vport_env.nic_receive_steering_discard) }, +}; + +static const struct counter_desc vnic_env_stats_dev_oob_desc[] = { + { "dev_internal_queue_oob", + VNIC_ENV_OFF(vport_env.internal_rq_out_of_buffer) }, +}; + +static const struct counter_desc vnic_env_stats_drop_desc[] = { + { "rx_oversize_pkts_buffer", + VNIC_ENV_OFF(vport_env.eth_wqe_too_small) }, +}; + +#define NUM_VNIC_ENV_STEER_COUNTERS(dev) \ + (MLX5_CAP_GEN(dev, nic_receive_steering_discard) ? \ + ARRAY_SIZE(vnic_env_stats_steer_desc) : 0) +#define NUM_VNIC_ENV_DEV_OOB_COUNTERS(dev) \ + (MLX5_CAP_GEN(dev, vnic_env_int_rq_oob) ? \ + ARRAY_SIZE(vnic_env_stats_dev_oob_desc) : 0) +#define NUM_VNIC_ENV_DROP_COUNTERS(dev) \ + (MLX5_CAP_GEN(dev, eth_wqe_too_small) ? \ + ARRAY_SIZE(vnic_env_stats_drop_desc) : 0) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(vnic_env) +{ + return NUM_VNIC_ENV_STEER_COUNTERS(priv->mdev) + + NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev) + + NUM_VNIC_ENV_DROP_COUNTERS(priv->mdev); +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vnic_env) +{ + int i; + + for (i = 0; i < NUM_VNIC_ENV_STEER_COUNTERS(priv->mdev); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + vnic_env_stats_steer_desc[i].format); + + for (i = 0; i < NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + vnic_env_stats_dev_oob_desc[i].format); + + for (i = 0; i < NUM_VNIC_ENV_DROP_COUNTERS(priv->mdev); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + vnic_env_stats_drop_desc[i].format); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vnic_env) +{ + int i; + + for (i = 0; i < NUM_VNIC_ENV_STEER_COUNTERS(priv->mdev); i++) + data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vnic.query_vnic_env_out, + vnic_env_stats_steer_desc, i); + + for (i = 0; i < NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev); i++) + data[idx++] = MLX5E_READ_CTR32_BE(priv->stats.vnic.query_vnic_env_out, + vnic_env_stats_dev_oob_desc, i); + + for (i = 0; i < NUM_VNIC_ENV_DROP_COUNTERS(priv->mdev); i++) + data[idx++] = MLX5E_READ_CTR32_BE(priv->stats.vnic.query_vnic_env_out, + vnic_env_stats_drop_desc, i); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vnic_env) +{ + u32 *out = (u32 *)priv->stats.vnic.query_vnic_env_out; + u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {}; + struct mlx5_core_dev *mdev = priv->mdev; + + if (!mlx5e_stats_grp_vnic_env_num_stats(priv)) + return; + + MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV); + mlx5_cmd_exec_inout(mdev, query_vnic_env, in, out); +} + +#define VPORT_COUNTER_OFF(c) MLX5_BYTE_OFF(query_vport_counter_out, c) +static const struct counter_desc vport_stats_desc[] = { + { "rx_vport_unicast_packets", + VPORT_COUNTER_OFF(received_eth_unicast.packets) }, + { "rx_vport_unicast_bytes", + VPORT_COUNTER_OFF(received_eth_unicast.octets) }, + { "tx_vport_unicast_packets", + VPORT_COUNTER_OFF(transmitted_eth_unicast.packets) }, + { "tx_vport_unicast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_unicast.octets) }, + { "rx_vport_multicast_packets", + VPORT_COUNTER_OFF(received_eth_multicast.packets) }, + { "rx_vport_multicast_bytes", + VPORT_COUNTER_OFF(received_eth_multicast.octets) }, + { "tx_vport_multicast_packets", + VPORT_COUNTER_OFF(transmitted_eth_multicast.packets) }, + { "tx_vport_multicast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_multicast.octets) }, + { "rx_vport_broadcast_packets", + VPORT_COUNTER_OFF(received_eth_broadcast.packets) }, + { "rx_vport_broadcast_bytes", + VPORT_COUNTER_OFF(received_eth_broadcast.octets) }, + { "tx_vport_broadcast_packets", + VPORT_COUNTER_OFF(transmitted_eth_broadcast.packets) }, + { "tx_vport_broadcast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_broadcast.octets) }, + { "rx_vport_rdma_unicast_packets", + VPORT_COUNTER_OFF(received_ib_unicast.packets) }, + { "rx_vport_rdma_unicast_bytes", + VPORT_COUNTER_OFF(received_ib_unicast.octets) }, + { "tx_vport_rdma_unicast_packets", + VPORT_COUNTER_OFF(transmitted_ib_unicast.packets) }, + { "tx_vport_rdma_unicast_bytes", + VPORT_COUNTER_OFF(transmitted_ib_unicast.octets) }, + { "rx_vport_rdma_multicast_packets", + VPORT_COUNTER_OFF(received_ib_multicast.packets) }, + { "rx_vport_rdma_multicast_bytes", + VPORT_COUNTER_OFF(received_ib_multicast.octets) }, + { "tx_vport_rdma_multicast_packets", + VPORT_COUNTER_OFF(transmitted_ib_multicast.packets) }, + { "tx_vport_rdma_multicast_bytes", + VPORT_COUNTER_OFF(transmitted_ib_multicast.octets) }, +}; + +static const struct counter_desc vport_loopback_stats_desc[] = { + { "vport_loopback_packets", + VPORT_COUNTER_OFF(local_loopback.packets) }, + { "vport_loopback_bytes", + VPORT_COUNTER_OFF(local_loopback.octets) }, +}; + +#define NUM_VPORT_COUNTERS ARRAY_SIZE(vport_stats_desc) +#define NUM_VPORT_LOOPBACK_COUNTERS(dev) \ + (MLX5_CAP_GEN(dev, vport_counter_local_loopback) ? \ + ARRAY_SIZE(vport_loopback_stats_desc) : 0) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(vport) +{ + return NUM_VPORT_COUNTERS + + NUM_VPORT_LOOPBACK_COUNTERS(priv->mdev); +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vport) +{ + int i; + + for (i = 0; i < NUM_VPORT_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, vport_stats_desc[i].format); + + for (i = 0; i < NUM_VPORT_LOOPBACK_COUNTERS(priv->mdev); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + vport_loopback_stats_desc[i].format); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vport) +{ + int i; + + for (i = 0; i < NUM_VPORT_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out, + vport_stats_desc, i); + + for (i = 0; i < NUM_VPORT_LOOPBACK_COUNTERS(priv->mdev); i++) + data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out, + vport_loopback_stats_desc, i); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport) +{ + u32 *out = (u32 *)priv->stats.vport.query_vport_out; + u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {}; + struct mlx5_core_dev *mdev = priv->mdev; + + MLX5_SET(query_vport_counter_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_COUNTER); + mlx5_cmd_exec_inout(mdev, query_vport_counter, in, out); +} + +#define PPORT_802_3_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_802_3_cntrs_grp_data_layout.c##_high) +static const struct counter_desc pport_802_3_stats_desc[] = { + { "tx_packets_phy", PPORT_802_3_OFF(a_frames_transmitted_ok) }, + { "rx_packets_phy", PPORT_802_3_OFF(a_frames_received_ok) }, + { "rx_crc_errors_phy", PPORT_802_3_OFF(a_frame_check_sequence_errors) }, + { "tx_bytes_phy", PPORT_802_3_OFF(a_octets_transmitted_ok) }, + { "rx_bytes_phy", PPORT_802_3_OFF(a_octets_received_ok) }, + { "tx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) }, + { "tx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) }, + { "rx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_received_ok) }, + { "rx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_received_ok) }, + { "rx_in_range_len_errors_phy", PPORT_802_3_OFF(a_in_range_length_errors) }, + { "rx_out_of_range_len_phy", PPORT_802_3_OFF(a_out_of_range_length_field) }, + { "rx_oversize_pkts_phy", PPORT_802_3_OFF(a_frame_too_long_errors) }, + { "rx_symbol_err_phy", PPORT_802_3_OFF(a_symbol_error_during_carrier) }, + { "tx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_transmitted) }, + { "rx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_received) }, + { "rx_unsupported_op_phy", PPORT_802_3_OFF(a_unsupported_opcodes_received) }, + { "rx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) }, + { "tx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) }, +}; + +#define NUM_PPORT_802_3_COUNTERS ARRAY_SIZE(pport_802_3_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(802_3) +{ + return NUM_PPORT_802_3_COUNTERS; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(802_3) +{ + int i; + + for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_802_3_stats_desc[i].format); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(802_3) +{ + int i; + + for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.IEEE_802_3_counters, + pport_802_3_stats_desc, i); + return idx; +} + +#define MLX5_BASIC_PPCNT_SUPPORTED(mdev) \ + (MLX5_CAP_GEN(mdev, pcam_reg) ? MLX5_CAP_PCAM_REG(mdev, ppcnt) : 1) + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(802_3) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + + if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev)) + return; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + out = pstats->IEEE_802_3_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); +} + +#define MLX5E_READ_CTR64_BE_F(ptr, set, c) \ + be64_to_cpu(*(__be64 *)((char *)ptr + \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.set.c##_high))) + +static int mlx5e_stats_get_ieee(struct mlx5_core_dev *mdev, + u32 *ppcnt_ieee_802_3) +{ + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + + if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev)) + return -EOPNOTSUPP; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); + return mlx5_core_access_reg(mdev, in, sz, ppcnt_ieee_802_3, + sz, MLX5_REG_PPCNT, 0, 0); +} + +void mlx5e_stats_pause_get(struct mlx5e_priv *priv, + struct ethtool_pause_stats *pause_stats) +{ + u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + + if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3)) + return; + + pause_stats->tx_pause_frames = + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + eth_802_3_cntrs_grp_data_layout, + a_pause_mac_ctrl_frames_transmitted); + pause_stats->rx_pause_frames = + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + eth_802_3_cntrs_grp_data_layout, + a_pause_mac_ctrl_frames_received); +} + +void mlx5e_stats_eth_phy_get(struct mlx5e_priv *priv, + struct ethtool_eth_phy_stats *phy_stats) +{ + u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + + if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3)) + return; + + phy_stats->SymbolErrorDuringCarrier = + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + eth_802_3_cntrs_grp_data_layout, + a_symbol_error_during_carrier); +} + +void mlx5e_stats_eth_mac_get(struct mlx5e_priv *priv, + struct ethtool_eth_mac_stats *mac_stats) +{ + u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + + if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3)) + return; + +#define RD(name) \ + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, \ + eth_802_3_cntrs_grp_data_layout, \ + name) + + mac_stats->FramesTransmittedOK = RD(a_frames_transmitted_ok); + mac_stats->FramesReceivedOK = RD(a_frames_received_ok); + mac_stats->FrameCheckSequenceErrors = RD(a_frame_check_sequence_errors); + mac_stats->OctetsTransmittedOK = RD(a_octets_transmitted_ok); + mac_stats->OctetsReceivedOK = RD(a_octets_received_ok); + mac_stats->MulticastFramesXmittedOK = RD(a_multicast_frames_xmitted_ok); + mac_stats->BroadcastFramesXmittedOK = RD(a_broadcast_frames_xmitted_ok); + mac_stats->MulticastFramesReceivedOK = RD(a_multicast_frames_received_ok); + mac_stats->BroadcastFramesReceivedOK = RD(a_broadcast_frames_received_ok); + mac_stats->InRangeLengthErrors = RD(a_in_range_length_errors); + mac_stats->OutOfRangeLengthField = RD(a_out_of_range_length_field); + mac_stats->FrameTooLongErrors = RD(a_frame_too_long_errors); +#undef RD +} + +void mlx5e_stats_eth_ctrl_get(struct mlx5e_priv *priv, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ + u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + + if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3)) + return; + + ctrl_stats->MACControlFramesTransmitted = + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + eth_802_3_cntrs_grp_data_layout, + a_mac_control_frames_transmitted); + ctrl_stats->MACControlFramesReceived = + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + eth_802_3_cntrs_grp_data_layout, + a_mac_control_frames_received); + ctrl_stats->UnsupportedOpcodesReceived = + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + eth_802_3_cntrs_grp_data_layout, + a_unsupported_opcodes_received); +} + +#define PPORT_2863_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_2863_cntrs_grp_data_layout.c##_high) +static const struct counter_desc pport_2863_stats_desc[] = { + { "rx_discards_phy", PPORT_2863_OFF(if_in_discards) }, + { "tx_discards_phy", PPORT_2863_OFF(if_out_discards) }, + { "tx_errors_phy", PPORT_2863_OFF(if_out_errors) }, +}; + +#define NUM_PPORT_2863_COUNTERS ARRAY_SIZE(pport_2863_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(2863) +{ + return NUM_PPORT_2863_COUNTERS; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(2863) +{ + int i; + + for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2863_stats_desc[i].format); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(2863) +{ + int i; + + for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2863_counters, + pport_2863_stats_desc, i); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(2863) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + out = pstats->RFC_2863_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); +} + +#define PPORT_2819_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_2819_cntrs_grp_data_layout.c##_high) +static const struct counter_desc pport_2819_stats_desc[] = { + { "rx_undersize_pkts_phy", PPORT_2819_OFF(ether_stats_undersize_pkts) }, + { "rx_fragments_phy", PPORT_2819_OFF(ether_stats_fragments) }, + { "rx_jabbers_phy", PPORT_2819_OFF(ether_stats_jabbers) }, + { "rx_64_bytes_phy", PPORT_2819_OFF(ether_stats_pkts64octets) }, + { "rx_65_to_127_bytes_phy", PPORT_2819_OFF(ether_stats_pkts65to127octets) }, + { "rx_128_to_255_bytes_phy", PPORT_2819_OFF(ether_stats_pkts128to255octets) }, + { "rx_256_to_511_bytes_phy", PPORT_2819_OFF(ether_stats_pkts256to511octets) }, + { "rx_512_to_1023_bytes_phy", PPORT_2819_OFF(ether_stats_pkts512to1023octets) }, + { "rx_1024_to_1518_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1024to1518octets) }, + { "rx_1519_to_2047_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1519to2047octets) }, + { "rx_2048_to_4095_bytes_phy", PPORT_2819_OFF(ether_stats_pkts2048to4095octets) }, + { "rx_4096_to_8191_bytes_phy", PPORT_2819_OFF(ether_stats_pkts4096to8191octets) }, + { "rx_8192_to_10239_bytes_phy", PPORT_2819_OFF(ether_stats_pkts8192to10239octets) }, +}; + +#define NUM_PPORT_2819_COUNTERS ARRAY_SIZE(pport_2819_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(2819) +{ + return NUM_PPORT_2819_COUNTERS; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(2819) +{ + int i; + + for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2819_stats_desc[i].format); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(2819) +{ + int i; + + for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters, + pport_2819_stats_desc, i); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(2819) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + + if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev)) + return; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + out = pstats->RFC_2819_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); +} + +static const struct ethtool_rmon_hist_range mlx5e_rmon_ranges[] = { + { 0, 64 }, + { 65, 127 }, + { 128, 255 }, + { 256, 511 }, + { 512, 1023 }, + { 1024, 1518 }, + { 1519, 2047 }, + { 2048, 4095 }, + { 4096, 8191 }, + { 8192, 10239 }, + {} +}; + +void mlx5e_stats_rmon_get(struct mlx5e_priv *priv, + struct ethtool_rmon_stats *rmon, + const struct ethtool_rmon_hist_range **ranges) +{ + u32 ppcnt_RFC_2819_counters[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); + if (mlx5_core_access_reg(mdev, in, sz, ppcnt_RFC_2819_counters, + sz, MLX5_REG_PPCNT, 0, 0)) + return; + +#define RD(name) \ + MLX5E_READ_CTR64_BE_F(ppcnt_RFC_2819_counters, \ + eth_2819_cntrs_grp_data_layout, \ + name) + + rmon->undersize_pkts = RD(ether_stats_undersize_pkts); + rmon->fragments = RD(ether_stats_fragments); + rmon->jabbers = RD(ether_stats_jabbers); + + rmon->hist[0] = RD(ether_stats_pkts64octets); + rmon->hist[1] = RD(ether_stats_pkts65to127octets); + rmon->hist[2] = RD(ether_stats_pkts128to255octets); + rmon->hist[3] = RD(ether_stats_pkts256to511octets); + rmon->hist[4] = RD(ether_stats_pkts512to1023octets); + rmon->hist[5] = RD(ether_stats_pkts1024to1518octets); + rmon->hist[6] = RD(ether_stats_pkts1519to2047octets); + rmon->hist[7] = RD(ether_stats_pkts2048to4095octets); + rmon->hist[8] = RD(ether_stats_pkts4096to8191octets); + rmon->hist[9] = RD(ether_stats_pkts8192to10239octets); +#undef RD + + *ranges = mlx5e_rmon_ranges; +} + +#define PPORT_PHY_STATISTICAL_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.phys_layer_statistical_cntrs.c##_high) +static const struct counter_desc pport_phy_statistical_stats_desc[] = { + { "rx_pcs_symbol_err_phy", PPORT_PHY_STATISTICAL_OFF(phy_symbol_errors) }, + { "rx_corrected_bits_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits) }, +}; + +static const struct counter_desc +pport_phy_statistical_err_lanes_stats_desc[] = { + { "rx_err_lane_0_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane0) }, + { "rx_err_lane_1_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane1) }, + { "rx_err_lane_2_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane2) }, + { "rx_err_lane_3_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane3) }, +}; + +#define NUM_PPORT_PHY_STATISTICAL_COUNTERS \ + ARRAY_SIZE(pport_phy_statistical_stats_desc) +#define NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS \ + ARRAY_SIZE(pport_phy_statistical_err_lanes_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(phy) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int num_stats; + + /* "1" for link_down_events special counter */ + num_stats = 1; + + num_stats += MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group) ? + NUM_PPORT_PHY_STATISTICAL_COUNTERS : 0; + + num_stats += MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters) ? + NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS : 0; + + return num_stats; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(phy) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int i; + + strcpy(data + (idx++) * ETH_GSTRING_LEN, "link_down_events_phy"); + + if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) + return idx; + + for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_phy_statistical_stats_desc[i].format); + + if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters)) + for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_phy_statistical_err_lanes_stats_desc[i].format); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(phy) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int i; + + /* link_down_events_phy has special handling since it is not stored in __be64 format */ + data[idx++] = MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters, + counter_set.phys_layer_cntrs.link_down_events); + + if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) + return idx; + + for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters, + pport_phy_statistical_stats_desc, i); + + if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters)) + for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters, + pport_phy_statistical_err_lanes_stats_desc, + i); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(phy) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + out = pstats->phy_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + + if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) + return; + + out = pstats->phy_statistical_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); +} + +void mlx5e_get_link_ext_stats(struct net_device *dev, + struct ethtool_link_ext_stats *stats) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + u32 out[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); + mlx5_core_access_reg(priv->mdev, in, sz, out, + MLX5_ST_SZ_BYTES(ppcnt_reg), MLX5_REG_PPCNT, 0, 0); + + stats->link_down_events = MLX5_GET(ppcnt_reg, out, + counter_set.phys_layer_cntrs.link_down_events); +} + +static int fec_num_lanes(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(pmlp_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {}; + int err; + + MLX5_SET(pmlp_reg, in, local_port, 1); + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_PMLP, 0, 0); + if (err) + return 0; + + return MLX5_GET(pmlp_reg, out, width); +} + +static int fec_active_mode(struct mlx5_core_dev *mdev) +{ + unsigned long fec_active_long; + u32 fec_active; + + if (mlx5e_get_fec_mode(mdev, &fec_active, NULL)) + return MLX5E_FEC_NOFEC; + + fec_active_long = fec_active; + return find_first_bit(&fec_active_long, sizeof(unsigned long) * BITS_PER_BYTE); +} + +#define MLX5E_STATS_SET_FEC_BLOCK(idx) ({ \ + fec_stats->corrected_blocks.lanes[(idx)] = \ + MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, \ + fc_fec_corrected_blocks_lane##idx); \ + fec_stats->uncorrectable_blocks.lanes[(idx)] = \ + MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, \ + fc_fec_uncorrectable_blocks_lane##idx); \ +}) + +static void fec_set_fc_stats(struct ethtool_fec_stats *fec_stats, + u32 *ppcnt, u8 lanes) +{ + if (lanes > 3) { /* 4 lanes */ + MLX5E_STATS_SET_FEC_BLOCK(3); + MLX5E_STATS_SET_FEC_BLOCK(2); + } + if (lanes > 1) /* 2 lanes */ + MLX5E_STATS_SET_FEC_BLOCK(1); + if (lanes > 0) /* 1 lane */ + MLX5E_STATS_SET_FEC_BLOCK(0); +} + +static void fec_set_rs_stats(struct ethtool_fec_stats *fec_stats, u32 *ppcnt) +{ + fec_stats->corrected_blocks.total = + MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, + rs_fec_corrected_blocks); + fec_stats->uncorrectable_blocks.total = + MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, + rs_fec_uncorrectable_blocks); +} + +static void fec_set_block_stats(struct mlx5e_priv *priv, + struct ethtool_fec_stats *fec_stats) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 out[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + int mode = fec_active_mode(mdev); + + if (mode == MLX5E_FEC_NOFEC) + return; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); + if (mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0)) + return; + + switch (mode) { + case MLX5E_FEC_RS_528_514: + case MLX5E_FEC_RS_544_514: + case MLX5E_FEC_LLRS_272_257_1: + fec_set_rs_stats(fec_stats, out); + return; + case MLX5E_FEC_FIRECODE: + fec_set_fc_stats(fec_stats, out, fec_num_lanes(mdev)); + } +} + +static void fec_set_corrected_bits_total(struct mlx5e_priv *priv, + struct ethtool_fec_stats *fec_stats) +{ + u32 ppcnt_phy_statistical[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP); + if (mlx5_core_access_reg(mdev, in, sz, ppcnt_phy_statistical, + sz, MLX5_REG_PPCNT, 0, 0)) + return; + + fec_stats->corrected_bits.total = + MLX5E_READ_CTR64_BE_F(ppcnt_phy_statistical, + phys_layer_statistical_cntrs, + phy_corrected_bits); +} + +void mlx5e_stats_fec_get(struct mlx5e_priv *priv, + struct ethtool_fec_stats *fec_stats) +{ + if (!MLX5_CAP_PCAM_FEATURE(priv->mdev, ppcnt_statistical_group)) + return; + + fec_set_corrected_bits_total(priv, fec_stats); + fec_set_block_stats(priv, fec_stats); +} + +#define PPORT_ETH_EXT_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_extended_cntrs_grp_data_layout.c##_high) +static const struct counter_desc pport_eth_ext_stats_desc[] = { + { "rx_buffer_passed_thres_phy", PPORT_ETH_EXT_OFF(rx_buffer_almost_full) }, +}; + +#define NUM_PPORT_ETH_EXT_COUNTERS ARRAY_SIZE(pport_eth_ext_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(eth_ext) +{ + if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) + return NUM_PPORT_ETH_EXT_COUNTERS; + + return 0; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(eth_ext) +{ + int i; + + if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) + for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_eth_ext_stats_desc[i].format); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(eth_ext) +{ + int i; + + if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) + for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.eth_ext_counters, + pport_eth_ext_stats_desc, i); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(eth_ext) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + + if (!MLX5_CAP_PCAM_FEATURE(mdev, rx_buffer_fullness_counters)) + return; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + out = pstats->eth_ext_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); +} + +#define PCIE_PERF_OFF(c) \ + MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c) +static const struct counter_desc pcie_perf_stats_desc[] = { + { "rx_pci_signal_integrity", PCIE_PERF_OFF(rx_errors) }, + { "tx_pci_signal_integrity", PCIE_PERF_OFF(tx_errors) }, +}; + +#define PCIE_PERF_OFF64(c) \ + MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c##_high) +static const struct counter_desc pcie_perf_stats_desc64[] = { + { "outbound_pci_buffer_overflow", PCIE_PERF_OFF64(tx_overflow_buffer_pkt) }, +}; + +static const struct counter_desc pcie_perf_stall_stats_desc[] = { + { "outbound_pci_stalled_rd", PCIE_PERF_OFF(outbound_stalled_reads) }, + { "outbound_pci_stalled_wr", PCIE_PERF_OFF(outbound_stalled_writes) }, + { "outbound_pci_stalled_rd_events", PCIE_PERF_OFF(outbound_stalled_reads_events) }, + { "outbound_pci_stalled_wr_events", PCIE_PERF_OFF(outbound_stalled_writes_events) }, +}; + +#define NUM_PCIE_PERF_COUNTERS ARRAY_SIZE(pcie_perf_stats_desc) +#define NUM_PCIE_PERF_COUNTERS64 ARRAY_SIZE(pcie_perf_stats_desc64) +#define NUM_PCIE_PERF_STALL_COUNTERS ARRAY_SIZE(pcie_perf_stall_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(pcie) +{ + int num_stats = 0; + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group)) + num_stats += NUM_PCIE_PERF_COUNTERS; + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt)) + num_stats += NUM_PCIE_PERF_COUNTERS64; + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled)) + num_stats += NUM_PCIE_PERF_STALL_COUNTERS; + + return num_stats; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(pcie) +{ + int i; + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group)) + for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pcie_perf_stats_desc[i].format); + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt)) + for (i = 0; i < NUM_PCIE_PERF_COUNTERS64; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pcie_perf_stats_desc64[i].format); + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled)) + for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pcie_perf_stall_stats_desc[i].format); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(pcie) +{ + int i; + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group)) + for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, + pcie_perf_stats_desc, i); + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt)) + for (i = 0; i < NUM_PCIE_PERF_COUNTERS64; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pcie.pcie_perf_counters, + pcie_perf_stats_desc64, i); + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled)) + for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, + pcie_perf_stall_stats_desc, i); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pcie) +{ + struct mlx5e_pcie_stats *pcie_stats = &priv->stats.pcie; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(mpcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(mpcnt_reg); + void *out; + + if (!MLX5_CAP_MCAM_FEATURE(mdev, pcie_performance_group)) + return; + + out = pcie_stats->pcie_perf_counters; + MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0); +} + +#define PPORT_PER_TC_PRIO_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_per_tc_prio_grp_data_layout.c##_high) + +static const struct counter_desc pport_per_tc_prio_stats_desc[] = { + { "rx_prio%d_buf_discard", PPORT_PER_TC_PRIO_OFF(no_buffer_discard_uc) }, +}; + +#define NUM_PPORT_PER_TC_PRIO_COUNTERS ARRAY_SIZE(pport_per_tc_prio_stats_desc) + +#define PPORT_PER_TC_CONGEST_PRIO_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_per_tc_congest_prio_grp_data_layout.c##_high) + +static const struct counter_desc pport_per_tc_congest_prio_stats_desc[] = { + { "rx_prio%d_cong_discard", PPORT_PER_TC_CONGEST_PRIO_OFF(wred_discard) }, + { "rx_prio%d_marked", PPORT_PER_TC_CONGEST_PRIO_OFF(ecn_marked_tc) }, +}; + +#define NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS \ + ARRAY_SIZE(pport_per_tc_congest_prio_stats_desc) + +static int mlx5e_grp_per_tc_prio_get_num_stats(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return 0; + + return NUM_PPORT_PER_TC_PRIO_COUNTERS * NUM_PPORT_PRIO; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(per_port_buff_congest) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int i, prio; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return idx; + + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + for (i = 0; i < NUM_PPORT_PER_TC_PRIO_COUNTERS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_tc_prio_stats_desc[i].format, prio); + for (i = 0; i < NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_tc_congest_prio_stats_desc[i].format, prio); + } + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(per_port_buff_congest) +{ + struct mlx5e_pport_stats *pport = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + int i, prio; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return idx; + + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + for (i = 0; i < NUM_PPORT_PER_TC_PRIO_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&pport->per_tc_prio_counters[prio], + pport_per_tc_prio_stats_desc, i); + for (i = 0; i < NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS ; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&pport->per_tc_congest_prio_counters[prio], + pport_per_tc_congest_prio_stats_desc, i); + } + + return idx; +} + +static void mlx5e_grp_per_tc_prio_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + int prio; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return; + + MLX5_SET(ppcnt_reg, in, pnat, 2); + MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + out = pstats->per_tc_prio_counters[prio]; + MLX5_SET(ppcnt_reg, in, prio_tc, prio); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + } +} + +static int mlx5e_grp_per_tc_congest_prio_get_num_stats(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return 0; + + return NUM_PPORT_PER_TC_CONGEST_PRIO_COUNTERS * NUM_PPORT_PRIO; +} + +static void mlx5e_grp_per_tc_congest_prio_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *out; + int prio; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return; + + MLX5_SET(ppcnt_reg, in, pnat, 2); + MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_TRAFFIC_CLASS_CONGESTION_GROUP); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + out = pstats->per_tc_congest_prio_counters[prio]; + MLX5_SET(ppcnt_reg, in, prio_tc, prio); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + } +} + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(per_port_buff_congest) +{ + return mlx5e_grp_per_tc_prio_get_num_stats(priv) + + mlx5e_grp_per_tc_congest_prio_get_num_stats(priv); +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(per_port_buff_congest) +{ + mlx5e_grp_per_tc_prio_update_stats(priv); + mlx5e_grp_per_tc_congest_prio_update_stats(priv); +} + +#define PPORT_PER_PRIO_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_per_prio_grp_data_layout.c##_high) +static const struct counter_desc pport_per_prio_traffic_stats_desc[] = { + { "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) }, + { "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) }, + { "rx_prio%d_discards", PPORT_PER_PRIO_OFF(rx_discards) }, + { "tx_prio%d_bytes", PPORT_PER_PRIO_OFF(tx_octets) }, + { "tx_prio%d_packets", PPORT_PER_PRIO_OFF(tx_frames) }, +}; + +#define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS ARRAY_SIZE(pport_per_prio_traffic_stats_desc) + +static int mlx5e_grp_per_prio_traffic_get_num_stats(void) +{ + return NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * NUM_PPORT_PRIO; +} + +static int mlx5e_grp_per_prio_traffic_fill_strings(struct mlx5e_priv *priv, + u8 *data, + int idx) +{ + int i, prio; + + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_traffic_stats_desc[i].format, prio); + } + + return idx; +} + +static int mlx5e_grp_per_prio_traffic_fill_stats(struct mlx5e_priv *priv, + u64 *data, + int idx) +{ + int i, prio; + + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], + pport_per_prio_traffic_stats_desc, i); + } + + return idx; +} + +static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { + /* %s is "global" or "prio{i}" */ + { "rx_%s_pause", PPORT_PER_PRIO_OFF(rx_pause) }, + { "rx_%s_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) }, + { "tx_%s_pause", PPORT_PER_PRIO_OFF(tx_pause) }, + { "tx_%s_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) }, + { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, +}; + +static const struct counter_desc pport_pfc_stall_stats_desc[] = { + { "tx_pause_storm_warning_events", PPORT_PER_PRIO_OFF(device_stall_minor_watermark_cnt) }, + { "tx_pause_storm_error_events", PPORT_PER_PRIO_OFF(device_stall_critical_watermark_cnt) }, +}; + +#define NUM_PPORT_PER_PRIO_PFC_COUNTERS ARRAY_SIZE(pport_per_prio_pfc_stats_desc) +#define NUM_PPORT_PFC_STALL_COUNTERS(priv) (ARRAY_SIZE(pport_pfc_stall_stats_desc) * \ + MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) * \ + MLX5_CAP_DEBUG((priv)->mdev, stall_detect)) + +static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u8 pfc_en_tx; + u8 pfc_en_rx; + int err; + + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + + err = mlx5_query_port_pfc(mdev, &pfc_en_tx, &pfc_en_rx); + + return err ? 0 : pfc_en_tx | pfc_en_rx; +} + +static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 rx_pause; + u32 tx_pause; + int err; + + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return false; + + err = mlx5_query_port_pause(mdev, &rx_pause, &tx_pause); + + return err ? false : rx_pause | tx_pause; +} + +static int mlx5e_grp_per_prio_pfc_get_num_stats(struct mlx5e_priv *priv) +{ + return (mlx5e_query_global_pause_combined(priv) + + hweight8(mlx5e_query_pfc_combined(priv))) * + NUM_PPORT_PER_PRIO_PFC_COUNTERS + + NUM_PPORT_PFC_STALL_COUNTERS(priv); +} + +static int mlx5e_grp_per_prio_pfc_fill_strings(struct mlx5e_priv *priv, + u8 *data, + int idx) +{ + unsigned long pfc_combined; + int i, prio; + + pfc_combined = mlx5e_query_pfc_combined(priv); + for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + char pfc_string[ETH_GSTRING_LEN]; + + snprintf(pfc_string, sizeof(pfc_string), "prio%d", prio); + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_pfc_stats_desc[i].format, pfc_string); + } + } + + if (mlx5e_query_global_pause_combined(priv)) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_pfc_stats_desc[i].format, "global"); + } + } + + for (i = 0; i < NUM_PPORT_PFC_STALL_COUNTERS(priv); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_pfc_stall_stats_desc[i].format); + + return idx; +} + +static int mlx5e_grp_per_prio_pfc_fill_stats(struct mlx5e_priv *priv, + u64 *data, + int idx) +{ + unsigned long pfc_combined; + int i, prio; + + pfc_combined = mlx5e_query_pfc_combined(priv); + for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], + pport_per_prio_pfc_stats_desc, i); + } + } + + if (mlx5e_query_global_pause_combined(priv)) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0], + pport_per_prio_pfc_stats_desc, i); + } + } + + for (i = 0; i < NUM_PPORT_PFC_STALL_COUNTERS(priv); i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0], + pport_pfc_stall_stats_desc, i); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(per_prio) +{ + return mlx5e_grp_per_prio_traffic_get_num_stats() + + mlx5e_grp_per_prio_pfc_get_num_stats(priv); +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(per_prio) +{ + idx = mlx5e_grp_per_prio_traffic_fill_strings(priv, data, idx); + idx = mlx5e_grp_per_prio_pfc_fill_strings(priv, data, idx); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(per_prio) +{ + idx = mlx5e_grp_per_prio_traffic_fill_stats(priv, data, idx); + idx = mlx5e_grp_per_prio_pfc_fill_stats(priv, data, idx); + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(per_prio) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + int prio; + void *out; + + if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev)) + return; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + out = pstats->per_prio_counters[prio]; + MLX5_SET(ppcnt_reg, in, prio_tc, prio); + mlx5_core_access_reg(mdev, in, sz, out, sz, + MLX5_REG_PPCNT, 0, 0); + } +} + +static const struct counter_desc mlx5e_pme_status_desc[] = { + { "module_unplug", sizeof(u64) * MLX5_MODULE_STATUS_UNPLUGGED }, +}; + +static const struct counter_desc mlx5e_pme_error_desc[] = { + { "module_bus_stuck", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BUS_STUCK }, + { "module_high_temp", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE }, + { "module_bad_shorted", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BAD_CABLE }, +}; + +#define NUM_PME_STATUS_STATS ARRAY_SIZE(mlx5e_pme_status_desc) +#define NUM_PME_ERR_STATS ARRAY_SIZE(mlx5e_pme_error_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(pme) +{ + return NUM_PME_STATUS_STATS + NUM_PME_ERR_STATS; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(pme) +{ + int i; + + for (i = 0; i < NUM_PME_STATUS_STATS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_status_desc[i].format); + + for (i = 0; i < NUM_PME_ERR_STATS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_error_desc[i].format); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(pme) +{ + struct mlx5_pme_stats pme_stats; + int i; + + mlx5_get_pme_stats(priv->mdev, &pme_stats); + + for (i = 0; i < NUM_PME_STATUS_STATS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.status_counters, + mlx5e_pme_status_desc, i); + + for (i = 0; i < NUM_PME_ERR_STATS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.error_counters, + mlx5e_pme_error_desc, i); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pme) { return; } + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(tls) +{ + return mlx5e_ktls_get_count(priv); +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(tls) +{ + return idx + mlx5e_ktls_get_strings(priv, data + idx * ETH_GSTRING_LEN); +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(tls) +{ + return idx + mlx5e_ktls_get_stats(priv, data + idx); +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(tls) { return; } + +static const struct counter_desc rq_stats_desc[] = { + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_redirect) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_skbs) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_match_packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_large_hds) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, ecn_mark) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, removed_vlan_packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) }, +#ifdef CONFIG_MLX5_EN_ARFS + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_add) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_request_in) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_request_out) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_expired) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) }, +#endif + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, recover) }, +#ifdef CONFIG_PAGE_POOL_STATS + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_fast) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_slow) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_slow_high_order) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_empty) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_refill) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_waive) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_cached) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_cache_full) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_ring) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_ring_full) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_released_ref) }, +#endif +#ifdef CONFIG_MLX5_EN_TLS + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_pkt) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_start) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_end) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_skip) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_res_ok) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_res_retry) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_res_skip) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_err) }, +#endif +}; + +static const struct counter_desc sq_stats_desc[] = { + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) }, +#ifdef CONFIG_MLX5_EN_TLS + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ooo) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_resync_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_skip_no_sync_data) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) }, +#endif + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, recover) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, cqes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, cqe_err) }, +}; + +static const struct counter_desc rq_xdpsq_stats_desc[] = { + { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) }, + { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) }, + { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) }, + { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, nops) }, + { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) }, + { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) }, + { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) }, +}; + +static const struct counter_desc xdpsq_stats_desc[] = { + { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) }, + { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) }, + { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) }, + { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, nops) }, + { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) }, + { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) }, + { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) }, +}; + +static const struct counter_desc xskrq_stats_desc[] = { + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, packets) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, bytes) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, ecn_mark) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, removed_vlan_packets) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, xdp_drop) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, xdp_redirect) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, wqe_err) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, congst_umr) }, +}; + +static const struct counter_desc xsksq_stats_desc[] = { + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, xmit) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, full) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, err) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, cqes) }, +}; + +static const struct counter_desc ch_stats_desc[] = { + { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, events) }, + { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, poll) }, + { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, arm) }, + { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, aff_change) }, + { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, force_irq) }, + { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, eq_rearm) }, +}; + +static const struct counter_desc ptp_sq_stats_desc[] = { + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, packets) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, bytes) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, csum_partial) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, nop) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, csum_none) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, stopped) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, dropped) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, recover) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, cqes) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, wake) }, + { MLX5E_DECLARE_PTP_TX_STAT(struct mlx5e_sq_stats, cqe_err) }, +}; + +static const struct counter_desc ptp_ch_stats_desc[] = { + { MLX5E_DECLARE_PTP_CH_STAT(struct mlx5e_ch_stats, events) }, + { MLX5E_DECLARE_PTP_CH_STAT(struct mlx5e_ch_stats, poll) }, + { MLX5E_DECLARE_PTP_CH_STAT(struct mlx5e_ch_stats, arm) }, + { MLX5E_DECLARE_PTP_CH_STAT(struct mlx5e_ch_stats, eq_rearm) }, +}; + +static const struct counter_desc ptp_cq_stats_desc[] = { + { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, cqe) }, + { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, err_cqe) }, + { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort) }, + { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) }, + { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, late_cqe) }, +}; + +static const struct counter_desc ptp_rq_stats_desc[] = { + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, packets) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, bytes) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete_tail) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, xdp_drop) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, xdp_redirect) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, lro_packets) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, lro_bytes) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, ecn_mark) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, removed_vlan_packets) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, wqe_err) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, congst_umr) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, recover) }, +}; + +static const struct counter_desc qos_sq_stats_desc[] = { + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_partial) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, nop) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) }, +#ifdef CONFIG_MLX5_EN_TLS + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ooo) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_resync_bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_skip_no_sync_data) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) }, +#endif + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_none) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, stopped) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, dropped) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, recover) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, cqes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, wake) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, cqe_err) }, +}; + +#define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) +#define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) +#define NUM_XDPSQ_STATS ARRAY_SIZE(xdpsq_stats_desc) +#define NUM_RQ_XDPSQ_STATS ARRAY_SIZE(rq_xdpsq_stats_desc) +#define NUM_XSKRQ_STATS ARRAY_SIZE(xskrq_stats_desc) +#define NUM_XSKSQ_STATS ARRAY_SIZE(xsksq_stats_desc) +#define NUM_CH_STATS ARRAY_SIZE(ch_stats_desc) +#define NUM_PTP_SQ_STATS ARRAY_SIZE(ptp_sq_stats_desc) +#define NUM_PTP_CH_STATS ARRAY_SIZE(ptp_ch_stats_desc) +#define NUM_PTP_CQ_STATS ARRAY_SIZE(ptp_cq_stats_desc) +#define NUM_PTP_RQ_STATS ARRAY_SIZE(ptp_rq_stats_desc) +#define NUM_QOS_SQ_STATS ARRAY_SIZE(qos_sq_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qos) +{ + /* Pairs with smp_store_release in mlx5e_open_qos_sq. */ + return NUM_QOS_SQ_STATS * smp_load_acquire(&priv->htb_max_qos_sqs); +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(qos) +{ + /* Pairs with smp_store_release in mlx5e_open_qos_sq. */ + u16 max_qos_sqs = smp_load_acquire(&priv->htb_max_qos_sqs); + int i, qid; + + for (qid = 0; qid < max_qos_sqs; qid++) + for (i = 0; i < NUM_QOS_SQ_STATS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + qos_sq_stats_desc[i].format, qid); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qos) +{ + struct mlx5e_sq_stats **stats; + u16 max_qos_sqs; + int i, qid; + + /* Pairs with smp_store_release in mlx5e_open_qos_sq. */ + max_qos_sqs = smp_load_acquire(&priv->htb_max_qos_sqs); + stats = READ_ONCE(priv->htb_qos_sq_stats); + + for (qid = 0; qid < max_qos_sqs; qid++) { + struct mlx5e_sq_stats *s = READ_ONCE(stats[qid]); + + for (i = 0; i < NUM_QOS_SQ_STATS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(s, qos_sq_stats_desc, i); + } + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qos) { return; } + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ptp) +{ + int num = NUM_PTP_CH_STATS; + + if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) + return 0; + + if (priv->tx_ptp_opened) + num += (NUM_PTP_SQ_STATS + NUM_PTP_CQ_STATS) * priv->max_opened_tc; + if (priv->rx_ptp_opened) + num += NUM_PTP_RQ_STATS; + + return num; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ptp) +{ + int i, tc; + + if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) + return idx; + + for (i = 0; i < NUM_PTP_CH_STATS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + "%s", ptp_ch_stats_desc[i].format); + + if (priv->tx_ptp_opened) { + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < NUM_PTP_SQ_STATS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + ptp_sq_stats_desc[i].format, tc); + + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < NUM_PTP_CQ_STATS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + ptp_cq_stats_desc[i].format, tc); + } + if (priv->rx_ptp_opened) { + for (i = 0; i < NUM_PTP_RQ_STATS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + ptp_rq_stats_desc[i].format, MLX5E_PTP_CHANNEL_IX); + } + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ptp) +{ + int i, tc; + + if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) + return idx; + + for (i = 0; i < NUM_PTP_CH_STATS; i++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->ptp_stats.ch, + ptp_ch_stats_desc, i); + + if (priv->tx_ptp_opened) { + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < NUM_PTP_SQ_STATS; i++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->ptp_stats.sq[tc], + ptp_sq_stats_desc, i); + + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < NUM_PTP_CQ_STATS; i++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->ptp_stats.cq[tc], + ptp_cq_stats_desc, i); + } + if (priv->rx_ptp_opened) { + for (i = 0; i < NUM_PTP_RQ_STATS; i++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->ptp_stats.rq, + ptp_rq_stats_desc, i); + } + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ptp) { return; } + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(channels) +{ + int max_nch = priv->stats_nch; + + return (NUM_RQ_STATS * max_nch) + + (NUM_CH_STATS * max_nch) + + (NUM_SQ_STATS * max_nch * priv->max_opened_tc) + + (NUM_RQ_XDPSQ_STATS * max_nch) + + (NUM_XDPSQ_STATS * max_nch) + + (NUM_XSKRQ_STATS * max_nch * priv->xsk.ever_used) + + (NUM_XSKSQ_STATS * max_nch * priv->xsk.ever_used); +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(channels) +{ + bool is_xsk = priv->xsk.ever_used; + int max_nch = priv->stats_nch; + int i, j, tc; + + for (i = 0; i < max_nch; i++) + for (j = 0; j < NUM_CH_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + ch_stats_desc[j].format, i); + + for (i = 0; i < max_nch; i++) { + for (j = 0; j < NUM_RQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + rq_stats_desc[j].format, i); + for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + xskrq_stats_desc[j].format, i); + for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + rq_xdpsq_stats_desc[j].format, i); + } + + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < max_nch; i++) + for (j = 0; j < NUM_SQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + sq_stats_desc[j].format, + i + tc * max_nch); + + for (i = 0; i < max_nch; i++) { + for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + xsksq_stats_desc[j].format, i); + for (j = 0; j < NUM_XDPSQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + xdpsq_stats_desc[j].format, i); + } + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(channels) +{ + bool is_xsk = priv->xsk.ever_used; + int max_nch = priv->stats_nch; + int i, j, tc; + + for (i = 0; i < max_nch; i++) + for (j = 0; j < NUM_CH_STATS; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->ch, + ch_stats_desc, j); + + for (i = 0; i < max_nch; i++) { + for (j = 0; j < NUM_RQ_STATS; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->rq, + rq_stats_desc, j); + for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xskrq, + xskrq_stats_desc, j); + for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->rq_xdpsq, + rq_xdpsq_stats_desc, j); + } + + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < max_nch; i++) + for (j = 0; j < NUM_SQ_STATS; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->sq[tc], + sq_stats_desc, j); + + for (i = 0; i < max_nch; i++) { + for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xsksq, + xsksq_stats_desc, j); + for (j = 0; j < NUM_XDPSQ_STATS; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xdpsq, + xdpsq_stats_desc, j); + } + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(channels) { return; } + +MLX5E_DEFINE_STATS_GRP(sw, 0); +MLX5E_DEFINE_STATS_GRP(qcnt, MLX5E_NDO_UPDATE_STATS); +MLX5E_DEFINE_STATS_GRP(vnic_env, 0); +MLX5E_DEFINE_STATS_GRP(vport, MLX5E_NDO_UPDATE_STATS); +MLX5E_DEFINE_STATS_GRP(802_3, MLX5E_NDO_UPDATE_STATS); +MLX5E_DEFINE_STATS_GRP(2863, 0); +MLX5E_DEFINE_STATS_GRP(2819, 0); +MLX5E_DEFINE_STATS_GRP(phy, 0); +MLX5E_DEFINE_STATS_GRP(pcie, 0); +MLX5E_DEFINE_STATS_GRP(per_prio, 0); +MLX5E_DEFINE_STATS_GRP(pme, 0); +MLX5E_DEFINE_STATS_GRP(channels, 0); +MLX5E_DEFINE_STATS_GRP(per_port_buff_congest, 0); +MLX5E_DEFINE_STATS_GRP(eth_ext, 0); +static MLX5E_DEFINE_STATS_GRP(tls, 0); +MLX5E_DEFINE_STATS_GRP(ptp, 0); +static MLX5E_DEFINE_STATS_GRP(qos, 0); + +/* The stats groups order is opposite to the update_stats() order calls */ +mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = { + &MLX5E_STATS_GRP(sw), + &MLX5E_STATS_GRP(qcnt), + &MLX5E_STATS_GRP(vnic_env), + &MLX5E_STATS_GRP(vport), + &MLX5E_STATS_GRP(802_3), + &MLX5E_STATS_GRP(2863), + &MLX5E_STATS_GRP(2819), + &MLX5E_STATS_GRP(phy), + &MLX5E_STATS_GRP(eth_ext), + &MLX5E_STATS_GRP(pcie), + &MLX5E_STATS_GRP(per_prio), + &MLX5E_STATS_GRP(pme), +#ifdef CONFIG_MLX5_EN_IPSEC + &MLX5E_STATS_GRP(ipsec_hw), + &MLX5E_STATS_GRP(ipsec_sw), +#endif + &MLX5E_STATS_GRP(tls), + &MLX5E_STATS_GRP(channels), + &MLX5E_STATS_GRP(per_port_buff_congest), + &MLX5E_STATS_GRP(ptp), + &MLX5E_STATS_GRP(qos), +#ifdef CONFIG_MLX5_MACSEC + &MLX5E_STATS_GRP(macsec_hw), +#endif +}; + +unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv) +{ + return ARRAY_SIZE(mlx5e_nic_stats_grps); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h new file mode 100644 index 0000000000..477c547dcc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -0,0 +1,523 @@ +/* + * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_EN_STATS_H__ +#define __MLX5_EN_STATS_H__ + +#define MLX5E_READ_CTR64_CPU(ptr, dsc, i) \ + (*(u64 *)((char *)ptr + dsc[i].offset)) +#define MLX5E_READ_CTR64_BE(ptr, dsc, i) \ + be64_to_cpu(*(__be64 *)((char *)ptr + dsc[i].offset)) +#define MLX5E_READ_CTR32_CPU(ptr, dsc, i) \ + (*(u32 *)((char *)ptr + dsc[i].offset)) +#define MLX5E_READ_CTR32_BE(ptr, dsc, i) \ + be32_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset)) + +#define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld) +#define MLX5E_DECLARE_RX_STAT(type, fld) "rx%d_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_TX_STAT(type, fld) "tx%d_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_XDPSQ_STAT(type, fld) "tx%d_xdp_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_RQ_XDPSQ_STAT(type, fld) "rx%d_xdp_tx_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_XSKRQ_STAT(type, fld) "rx%d_xsk_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_XSKSQ_STAT(type, fld) "tx%d_xsk_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_CH_STAT(type, fld) "ch%d_"#fld, offsetof(type, fld) + +#define MLX5E_DECLARE_PTP_TX_STAT(type, fld) "ptp_tx%d_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_PTP_CH_STAT(type, fld) "ptp_ch_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_PTP_CQ_STAT(type, fld) "ptp_cq%d_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_PTP_RQ_STAT(type, fld) "ptp_rq%d_"#fld, offsetof(type, fld) + +#define MLX5E_DECLARE_QOS_TX_STAT(type, fld) "qos_tx%d_"#fld, offsetof(type, fld) + +struct counter_desc { + char format[ETH_GSTRING_LEN]; + size_t offset; /* Byte offset */ +}; + +enum { + MLX5E_NDO_UPDATE_STATS = BIT(0x1), +}; + +struct mlx5e_priv; +struct mlx5e_stats_grp { + u16 update_stats_mask; + int (*get_num_stats)(struct mlx5e_priv *priv); + int (*fill_strings)(struct mlx5e_priv *priv, u8 *data, int idx); + int (*fill_stats)(struct mlx5e_priv *priv, u64 *data, int idx); + void (*update_stats)(struct mlx5e_priv *priv); +}; + +typedef const struct mlx5e_stats_grp *const mlx5e_stats_grp_t; + +#define MLX5E_STATS_GRP_OP(grp, name) mlx5e_stats_grp_ ## grp ## _ ## name + +#define MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(grp) \ + int MLX5E_STATS_GRP_OP(grp, num_stats)(struct mlx5e_priv *priv) + +#define MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(grp) \ + void MLX5E_STATS_GRP_OP(grp, update_stats)(struct mlx5e_priv *priv) + +#define MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(grp) \ + int MLX5E_STATS_GRP_OP(grp, fill_strings)(struct mlx5e_priv *priv, u8 *data, int idx) + +#define MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(grp) \ + int MLX5E_STATS_GRP_OP(grp, fill_stats)(struct mlx5e_priv *priv, u64 *data, int idx) + +#define MLX5E_STATS_GRP(grp) mlx5e_stats_grp_ ## grp + +#define MLX5E_DECLARE_STATS_GRP(grp) \ + const struct mlx5e_stats_grp MLX5E_STATS_GRP(grp) + +#define MLX5E_DEFINE_STATS_GRP(grp, mask) \ +MLX5E_DECLARE_STATS_GRP(grp) = { \ + .get_num_stats = MLX5E_STATS_GRP_OP(grp, num_stats), \ + .fill_stats = MLX5E_STATS_GRP_OP(grp, fill_stats), \ + .fill_strings = MLX5E_STATS_GRP_OP(grp, fill_strings), \ + .update_stats = MLX5E_STATS_GRP_OP(grp, update_stats), \ + .update_stats_mask = mask, \ +} + +unsigned int mlx5e_stats_total_num(struct mlx5e_priv *priv); +void mlx5e_stats_update(struct mlx5e_priv *priv); +void mlx5e_stats_fill(struct mlx5e_priv *priv, u64 *data, int idx); +void mlx5e_stats_fill_strings(struct mlx5e_priv *priv, u8 *data); +void mlx5e_stats_update_ndo_stats(struct mlx5e_priv *priv); + +void mlx5e_stats_pause_get(struct mlx5e_priv *priv, + struct ethtool_pause_stats *pause_stats); +void mlx5e_stats_fec_get(struct mlx5e_priv *priv, + struct ethtool_fec_stats *fec_stats); + +void mlx5e_stats_eth_phy_get(struct mlx5e_priv *priv, + struct ethtool_eth_phy_stats *phy_stats); +void mlx5e_stats_eth_mac_get(struct mlx5e_priv *priv, + struct ethtool_eth_mac_stats *mac_stats); +void mlx5e_stats_eth_ctrl_get(struct mlx5e_priv *priv, + struct ethtool_eth_ctrl_stats *ctrl_stats); +void mlx5e_stats_rmon_get(struct mlx5e_priv *priv, + struct ethtool_rmon_stats *rmon, + const struct ethtool_rmon_hist_range **ranges); +void mlx5e_get_link_ext_stats(struct net_device *dev, + struct ethtool_link_ext_stats *stats); + +/* Concrete NIC Stats */ + +struct mlx5e_sw_stats { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + u64 tx_tso_packets; + u64 tx_tso_bytes; + u64 tx_tso_inner_packets; + u64 tx_tso_inner_bytes; + u64 tx_added_vlan_packets; + u64 tx_nop; + u64 tx_mpwqe_blks; + u64 tx_mpwqe_pkts; + u64 rx_lro_packets; + u64 rx_lro_bytes; + u64 rx_gro_packets; + u64 rx_gro_bytes; + u64 rx_gro_skbs; + u64 rx_gro_match_packets; + u64 rx_gro_large_hds; + u64 rx_mcast_packets; + u64 rx_ecn_mark; + u64 rx_removed_vlan_packets; + u64 rx_csum_unnecessary; + u64 rx_csum_none; + u64 rx_csum_complete; + u64 rx_csum_complete_tail; + u64 rx_csum_complete_tail_slow; + u64 rx_csum_unnecessary_inner; + u64 rx_xdp_drop; + u64 rx_xdp_redirect; + u64 rx_xdp_tx_xmit; + u64 rx_xdp_tx_mpwqe; + u64 rx_xdp_tx_inlnw; + u64 rx_xdp_tx_nops; + u64 rx_xdp_tx_full; + u64 rx_xdp_tx_err; + u64 rx_xdp_tx_cqe; + u64 tx_csum_none; + u64 tx_csum_partial; + u64 tx_csum_partial_inner; + u64 tx_queue_stopped; + u64 tx_queue_dropped; + u64 tx_xmit_more; + u64 tx_recover; + u64 tx_cqes; + u64 tx_queue_wake; + u64 tx_cqe_err; + u64 tx_xdp_xmit; + u64 tx_xdp_mpwqe; + u64 tx_xdp_inlnw; + u64 tx_xdp_nops; + u64 tx_xdp_full; + u64 tx_xdp_err; + u64 tx_xdp_cqes; + u64 rx_wqe_err; + u64 rx_mpwqe_filler_cqes; + u64 rx_mpwqe_filler_strides; + u64 rx_oversize_pkts_sw_drop; + u64 rx_buff_alloc_err; + u64 rx_cqe_compress_blks; + u64 rx_cqe_compress_pkts; + u64 rx_congst_umr; +#ifdef CONFIG_MLX5_EN_ARFS + u64 rx_arfs_add; + u64 rx_arfs_request_in; + u64 rx_arfs_request_out; + u64 rx_arfs_expired; + u64 rx_arfs_err; +#endif + u64 rx_recover; + u64 ch_events; + u64 ch_poll; + u64 ch_arm; + u64 ch_aff_change; + u64 ch_force_irq; + u64 ch_eq_rearm; +#ifdef CONFIG_PAGE_POOL_STATS + u64 rx_pp_alloc_fast; + u64 rx_pp_alloc_slow; + u64 rx_pp_alloc_slow_high_order; + u64 rx_pp_alloc_empty; + u64 rx_pp_alloc_refill; + u64 rx_pp_alloc_waive; + u64 rx_pp_recycle_cached; + u64 rx_pp_recycle_cache_full; + u64 rx_pp_recycle_ring; + u64 rx_pp_recycle_ring_full; + u64 rx_pp_recycle_released_ref; +#endif +#ifdef CONFIG_MLX5_EN_TLS + u64 tx_tls_encrypted_packets; + u64 tx_tls_encrypted_bytes; + u64 tx_tls_ooo; + u64 tx_tls_dump_packets; + u64 tx_tls_dump_bytes; + u64 tx_tls_resync_bytes; + u64 tx_tls_skip_no_sync_data; + u64 tx_tls_drop_no_sync_data; + u64 tx_tls_drop_bypass_req; + + u64 rx_tls_decrypted_packets; + u64 rx_tls_decrypted_bytes; + u64 rx_tls_resync_req_pkt; + u64 rx_tls_resync_req_start; + u64 rx_tls_resync_req_end; + u64 rx_tls_resync_req_skip; + u64 rx_tls_resync_res_ok; + u64 rx_tls_resync_res_retry; + u64 rx_tls_resync_res_skip; + u64 rx_tls_err; +#endif + + u64 rx_xsk_packets; + u64 rx_xsk_bytes; + u64 rx_xsk_csum_complete; + u64 rx_xsk_csum_unnecessary; + u64 rx_xsk_csum_unnecessary_inner; + u64 rx_xsk_csum_none; + u64 rx_xsk_ecn_mark; + u64 rx_xsk_removed_vlan_packets; + u64 rx_xsk_xdp_drop; + u64 rx_xsk_xdp_redirect; + u64 rx_xsk_wqe_err; + u64 rx_xsk_mpwqe_filler_cqes; + u64 rx_xsk_mpwqe_filler_strides; + u64 rx_xsk_oversize_pkts_sw_drop; + u64 rx_xsk_buff_alloc_err; + u64 rx_xsk_cqe_compress_blks; + u64 rx_xsk_cqe_compress_pkts; + u64 rx_xsk_congst_umr; + u64 tx_xsk_xmit; + u64 tx_xsk_mpwqe; + u64 tx_xsk_inlnw; + u64 tx_xsk_full; + u64 tx_xsk_err; + u64 tx_xsk_cqes; +}; + +struct mlx5e_qcounter_stats { + u32 rx_out_of_buffer; + u32 rx_if_down_packets; +}; + +#define VNIC_ENV_GET(vnic_env_stats, c) \ + MLX5_GET(query_vnic_env_out, (vnic_env_stats)->query_vnic_env_out, \ + vport_env.c) + +struct mlx5e_vnic_env_stats { + __be64 query_vnic_env_out[MLX5_ST_SZ_QW(query_vnic_env_out)]; +}; + +#define VPORT_COUNTER_GET(vstats, c) MLX5_GET64(query_vport_counter_out, \ + vstats->query_vport_out, c) + +struct mlx5e_vport_stats { + __be64 query_vport_out[MLX5_ST_SZ_QW(query_vport_counter_out)]; +}; + +#define PPORT_802_3_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, pstats->IEEE_802_3_counters, \ + counter_set.eth_802_3_cntrs_grp_data_layout.c##_high) +#define PPORT_2863_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, pstats->RFC_2863_counters, \ + counter_set.eth_2863_cntrs_grp_data_layout.c##_high) +#define PPORT_2819_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, pstats->RFC_2819_counters, \ + counter_set.eth_2819_cntrs_grp_data_layout.c##_high) +#define PPORT_PHY_STATISTICAL_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, (pstats)->phy_statistical_counters, \ + counter_set.phys_layer_statistical_cntrs.c##_high) +#define PPORT_PER_PRIO_GET(pstats, prio, c) \ + MLX5_GET64(ppcnt_reg, pstats->per_prio_counters[prio], \ + counter_set.eth_per_prio_grp_data_layout.c##_high) +#define NUM_PPORT_PRIO 8 +#define PPORT_ETH_EXT_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, (pstats)->eth_ext_counters, \ + counter_set.eth_extended_cntrs_grp_data_layout.c##_high) + +struct mlx5e_pport_stats { + __be64 IEEE_802_3_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 RFC_2863_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 RFC_2819_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 per_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 phy_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 phy_statistical_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 eth_ext_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 per_tc_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 per_tc_congest_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; +}; + +#define PCIE_PERF_GET(pcie_stats, c) \ + MLX5_GET(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \ + counter_set.pcie_perf_cntrs_grp_data_layout.c) + +#define PCIE_PERF_GET64(pcie_stats, c) \ + MLX5_GET64(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \ + counter_set.pcie_perf_cntrs_grp_data_layout.c##_high) + +struct mlx5e_pcie_stats { + __be64 pcie_perf_counters[MLX5_ST_SZ_QW(mpcnt_reg)]; +}; + +struct mlx5e_rq_stats { + u64 packets; + u64 bytes; + u64 csum_complete; + u64 csum_complete_tail; + u64 csum_complete_tail_slow; + u64 csum_unnecessary; + u64 csum_unnecessary_inner; + u64 csum_none; + u64 lro_packets; + u64 lro_bytes; + u64 gro_packets; + u64 gro_bytes; + u64 gro_skbs; + u64 gro_match_packets; + u64 gro_large_hds; + u64 mcast_packets; + u64 ecn_mark; + u64 removed_vlan_packets; + u64 xdp_drop; + u64 xdp_redirect; + u64 wqe_err; + u64 mpwqe_filler_cqes; + u64 mpwqe_filler_strides; + u64 oversize_pkts_sw_drop; + u64 buff_alloc_err; + u64 cqe_compress_blks; + u64 cqe_compress_pkts; + u64 congst_umr; +#ifdef CONFIG_MLX5_EN_ARFS + u64 arfs_add; + u64 arfs_request_in; + u64 arfs_request_out; + u64 arfs_expired; + u64 arfs_err; +#endif + u64 recover; +#ifdef CONFIG_PAGE_POOL_STATS + u64 pp_alloc_fast; + u64 pp_alloc_slow; + u64 pp_alloc_slow_high_order; + u64 pp_alloc_empty; + u64 pp_alloc_refill; + u64 pp_alloc_waive; + u64 pp_recycle_cached; + u64 pp_recycle_cache_full; + u64 pp_recycle_ring; + u64 pp_recycle_ring_full; + u64 pp_recycle_released_ref; +#endif +#ifdef CONFIG_MLX5_EN_TLS + u64 tls_decrypted_packets; + u64 tls_decrypted_bytes; + u64 tls_resync_req_pkt; + u64 tls_resync_req_start; + u64 tls_resync_req_end; + u64 tls_resync_req_skip; + u64 tls_resync_res_ok; + u64 tls_resync_res_retry; + u64 tls_resync_res_skip; + u64 tls_err; +#endif +}; + +struct mlx5e_sq_stats { + /* commonly accessed in data path */ + u64 packets; + u64 bytes; + u64 xmit_more; + u64 tso_packets; + u64 tso_bytes; + u64 tso_inner_packets; + u64 tso_inner_bytes; + u64 csum_partial; + u64 csum_partial_inner; + u64 added_vlan_packets; + u64 nop; + u64 mpwqe_blks; + u64 mpwqe_pkts; +#ifdef CONFIG_MLX5_EN_TLS + u64 tls_encrypted_packets; + u64 tls_encrypted_bytes; + u64 tls_ooo; + u64 tls_dump_packets; + u64 tls_dump_bytes; + u64 tls_resync_bytes; + u64 tls_skip_no_sync_data; + u64 tls_drop_no_sync_data; + u64 tls_drop_bypass_req; +#endif + /* less likely accessed in data path */ + u64 csum_none; + u64 stopped; + u64 dropped; + u64 recover; + /* dirtied @completion */ + u64 cqes ____cacheline_aligned_in_smp; + u64 wake; + u64 cqe_err; +}; + +struct mlx5e_xdpsq_stats { + u64 xmit; + u64 mpwqe; + u64 inlnw; + u64 nops; + u64 full; + u64 err; + /* dirtied @completion */ + u64 cqes ____cacheline_aligned_in_smp; +}; + +struct mlx5e_ch_stats { + u64 events; + u64 poll; + u64 arm; + u64 aff_change; + u64 force_irq; + u64 eq_rearm; +}; + +struct mlx5e_ptp_cq_stats { + u64 cqe; + u64 err_cqe; + u64 abort; + u64 abort_abs_diff_ns; + u64 late_cqe; +}; + +struct mlx5e_rep_stats { + u64 vport_rx_packets; + u64 vport_tx_packets; + u64 vport_rx_bytes; + u64 vport_tx_bytes; + u64 rx_vport_rdma_unicast_packets; + u64 tx_vport_rdma_unicast_packets; + u64 rx_vport_rdma_unicast_bytes; + u64 tx_vport_rdma_unicast_bytes; + u64 rx_vport_rdma_multicast_packets; + u64 tx_vport_rdma_multicast_packets; + u64 rx_vport_rdma_multicast_bytes; + u64 tx_vport_rdma_multicast_bytes; +}; + +struct mlx5e_stats { + struct mlx5e_sw_stats sw; + struct mlx5e_qcounter_stats qcnt; + struct mlx5e_vnic_env_stats vnic; + struct mlx5e_vport_stats vport; + struct mlx5e_pport_stats pport; + struct mlx5e_pcie_stats pcie; + struct mlx5e_rep_stats rep_stats; +}; + +static inline void mlx5e_stats_copy_rep_stats(struct rtnl_link_stats64 *vf_vport, + struct mlx5e_rep_stats *rep_stats) +{ + memset(vf_vport, 0, sizeof(*vf_vport)); + vf_vport->rx_packets = rep_stats->vport_rx_packets; + vf_vport->tx_packets = rep_stats->vport_tx_packets; + vf_vport->rx_bytes = rep_stats->vport_rx_bytes; + vf_vport->tx_bytes = rep_stats->vport_tx_bytes; +} + +extern mlx5e_stats_grp_t mlx5e_nic_stats_grps[]; +unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv); + +extern MLX5E_DECLARE_STATS_GRP(sw); +extern MLX5E_DECLARE_STATS_GRP(qcnt); +extern MLX5E_DECLARE_STATS_GRP(vnic_env); +extern MLX5E_DECLARE_STATS_GRP(vport); +extern MLX5E_DECLARE_STATS_GRP(802_3); +extern MLX5E_DECLARE_STATS_GRP(2863); +extern MLX5E_DECLARE_STATS_GRP(2819); +extern MLX5E_DECLARE_STATS_GRP(phy); +extern MLX5E_DECLARE_STATS_GRP(eth_ext); +extern MLX5E_DECLARE_STATS_GRP(pcie); +extern MLX5E_DECLARE_STATS_GRP(per_prio); +extern MLX5E_DECLARE_STATS_GRP(pme); +extern MLX5E_DECLARE_STATS_GRP(channels); +extern MLX5E_DECLARE_STATS_GRP(per_port_buff_congest); +extern MLX5E_DECLARE_STATS_GRP(ipsec_hw); +extern MLX5E_DECLARE_STATS_GRP(ipsec_sw); +extern MLX5E_DECLARE_STATS_GRP(ptp); +extern MLX5E_DECLARE_STATS_GRP(macsec_hw); + +#endif /* __MLX5_EN_STATS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c new file mode 100644 index 0000000000..dc9b157a44 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -0,0 +1,5760 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "devlink.h" +#include "en.h" +#include "en/tc/post_act.h" +#include "en/tc/act_stats.h" +#include "en_rep.h" +#include "en/rep/tc.h" +#include "en/rep/neigh.h" +#include "en_tc.h" +#include "eswitch.h" +#include "fs_core.h" +#include "en/port.h" +#include "en/tc_tun.h" +#include "en/mapping.h" +#include "en/tc_ct.h" +#include "en/mod_hdr.h" +#include "en/tc_tun_encap.h" +#include "en/tc/sample.h" +#include "en/tc/act/act.h" +#include "en/tc/post_meter.h" +#include "lib/devcom.h" +#include "lib/geneve.h" +#include "lib/fs_chains.h" +#include "diag/en_tc_tracepoint.h" +#include +#include "lag/lag.h" +#include "lag/mp.h" + +#define MLX5E_TC_TABLE_NUM_GROUPS 4 +#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18) + +struct mlx5e_tc_table { + /* Protects the dynamic assignment of the t parameter + * which is the nic tc root table. + */ + struct mutex t_lock; + struct mlx5e_priv *priv; + struct mlx5_flow_table *t; + struct mlx5_flow_table *miss_t; + struct mlx5_fs_chains *chains; + struct mlx5e_post_act *post_act; + + struct rhashtable ht; + + struct mod_hdr_tbl mod_hdr; + struct mutex hairpin_tbl_lock; /* protects hairpin_tbl */ + DECLARE_HASHTABLE(hairpin_tbl, 8); + + struct notifier_block netdevice_nb; + struct netdev_net_notifier netdevice_nn; + + struct mlx5_tc_ct_priv *ct; + struct mapping_ctx *mapping; + struct dentry *dfs_root; + + /* tc action stats */ + struct mlx5e_tc_act_stats_handle *action_stats_handle; +}; + +struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { + [MAPPED_OBJ_TO_REG] = { + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, + .moffset = 0, + .mlen = 16, + }, + [VPORT_TO_REG] = { + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, + .moffset = 16, + .mlen = 16, + }, + [TUNNEL_TO_REG] = { + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1, + .moffset = 8, + .mlen = ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS, + .soffset = MLX5_BYTE_OFF(fte_match_param, + misc_parameters_2.metadata_reg_c_1), + }, + [ZONE_TO_REG] = zone_to_reg_ct, + [ZONE_RESTORE_TO_REG] = zone_restore_to_reg_ct, + [CTSTATE_TO_REG] = ctstate_to_reg_ct, + [MARK_TO_REG] = mark_to_reg_ct, + [LABELS_TO_REG] = labels_to_reg_ct, + [FTEID_TO_REG] = fteid_to_reg_ct, + /* For NIC rules we store the restore metadata directly + * into reg_b that is passed to SW since we don't + * jump between steering domains. + */ + [NIC_MAPPED_OBJ_TO_REG] = { + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B, + .moffset = 0, + .mlen = 16, + }, + [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct, + [PACKET_COLOR_TO_REG] = packet_color_to_reg, +}; + +struct mlx5e_tc_jump_state { + u32 jump_count; + bool jump_target; + struct mlx5_flow_attr *jumping_attr; + + enum flow_action_id last_id; + u32 last_index; +}; + +struct mlx5e_tc_table *mlx5e_tc_table_alloc(void) +{ + struct mlx5e_tc_table *tc; + + tc = kvzalloc(sizeof(*tc), GFP_KERNEL); + return tc ? tc : ERR_PTR(-ENOMEM); +} + +void mlx5e_tc_table_free(struct mlx5e_tc_table *tc) +{ + kvfree(tc); +} + +struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc) +{ + return tc->chains; +} + +/* To avoid false lock dependency warning set the tc_ht lock + * class different than the lock class of the ht being used when deleting + * last flow from a group and then deleting a group, we get into del_sw_flow_group() + * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but + * it's different than the ht->mutex here. + */ +static struct lock_class_key tc_ht_lock_key; +static struct lock_class_key tc_ht_wq_key; + +static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow); +static void free_flow_post_acts(struct mlx5e_tc_flow *flow); +static void mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr); + +void +mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, + enum mlx5e_tc_attr_to_reg type, + u32 val, + u32 mask) +{ + void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval; + int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset; + int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; + int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen; + u32 max_mask = GENMASK(match_len - 1, 0); + __be32 curr_mask_be, curr_val_be; + u32 curr_mask, curr_val; + + fmask = headers_c + soffset; + fval = headers_v + soffset; + + memcpy(&curr_mask_be, fmask, 4); + memcpy(&curr_val_be, fval, 4); + + curr_mask = be32_to_cpu(curr_mask_be); + curr_val = be32_to_cpu(curr_val_be); + + //move to correct offset + WARN_ON(mask > max_mask); + mask <<= moffset; + val <<= moffset; + max_mask <<= moffset; + + //zero val and mask + curr_mask &= ~max_mask; + curr_val &= ~max_mask; + + //add current to mask + curr_mask |= mask; + curr_val |= val; + + //back to be32 and write + curr_mask_be = cpu_to_be32(curr_mask); + curr_val_be = cpu_to_be32(curr_val); + + memcpy(fmask, &curr_mask_be, 4); + memcpy(fval, &curr_val_be, 4); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; +} + +void +mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec, + enum mlx5e_tc_attr_to_reg type, + u32 *val, + u32 *mask) +{ + void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval; + int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset; + int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; + int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen; + u32 max_mask = GENMASK(match_len - 1, 0); + __be32 curr_mask_be, curr_val_be; + u32 curr_mask, curr_val; + + fmask = headers_c + soffset; + fval = headers_v + soffset; + + memcpy(&curr_mask_be, fmask, 4); + memcpy(&curr_val_be, fval, 4); + + curr_mask = be32_to_cpu(curr_mask_be); + curr_val = be32_to_cpu(curr_val_be); + + *mask = (curr_mask >> moffset) & max_mask; + *val = (curr_val >> moffset) & max_mask; +} + +int +mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5_flow_namespace_type ns, + enum mlx5e_tc_attr_to_reg type, + u32 data) +{ + int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; + int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield; + int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen; + char *modact; + int err; + + modact = mlx5e_mod_hdr_alloc(mdev, ns, mod_hdr_acts); + if (IS_ERR(modact)) + return PTR_ERR(modact); + + /* Firmware has 5bit length field and 0 means 32bits */ + if (mlen == 32) + mlen = 0; + + MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, modact, field, mfield); + MLX5_SET(set_action_in, modact, offset, moffset); + MLX5_SET(set_action_in, modact, length, mlen); + MLX5_SET(set_action_in, modact, data, data); + err = mod_hdr_acts->num_actions; + mod_hdr_acts->num_actions++; + + return err; +} + +static struct mlx5e_tc_act_stats_handle * +get_act_stats_handle(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + + if (is_mdev_switchdev_mode(priv->mdev)) { + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + return uplink_priv->action_stats_handle; + } + + return tc->action_stats_handle; +} + +struct mlx5e_tc_int_port_priv * +mlx5e_get_int_port_priv(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + + if (is_mdev_switchdev_mode(priv->mdev)) { + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + return uplink_priv->int_port_priv; + } + + return NULL; +} + +struct mlx5e_flow_meters * +mlx5e_get_flow_meters(struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5e_priv *priv; + + if (is_mdev_switchdev_mode(dev)) { + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + priv = netdev_priv(uplink_rpriv->netdev); + if (!uplink_priv->flow_meters) + uplink_priv->flow_meters = + mlx5e_flow_meters_init(priv, + MLX5_FLOW_NAMESPACE_FDB, + uplink_priv->post_act); + if (!IS_ERR(uplink_priv->flow_meters)) + return uplink_priv->flow_meters; + } + + return NULL; +} + +static struct mlx5_tc_ct_priv * +get_ct_priv(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + + if (is_mdev_switchdev_mode(priv->mdev)) { + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + return uplink_priv->ct_priv; + } + + return tc->ct; +} + +static struct mlx5e_tc_psample * +get_sample_priv(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + + if (is_mdev_switchdev_mode(priv->mdev)) { + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + return uplink_priv->tc_psample; + } + + return NULL; +} + +static struct mlx5e_post_act * +get_post_action(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + + if (is_mdev_switchdev_mode(priv->mdev)) { + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + return uplink_priv->post_act; + } + + return tc->post_act; +} + +struct mlx5_flow_handle * +mlx5_tc_rule_insert(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (is_mdev_switchdev_mode(priv->mdev)) + return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); + + return mlx5e_add_offloaded_nic_rule(priv, spec, attr); +} + +void +mlx5_tc_rule_delete(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (is_mdev_switchdev_mode(priv->mdev)) { + mlx5_eswitch_del_offloaded_rule(esw, rule, attr); + return; + } + + mlx5e_del_offloaded_nic_rule(priv, rule, attr); +} + +static bool +is_flow_meter_action(struct mlx5_flow_attr *attr) +{ + return (((attr->action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) && + (attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER)) || + attr->flags & MLX5_ATTR_FLAG_MTU); +} + +static int +mlx5e_tc_add_flow_meter(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_post_act *post_act = get_post_action(priv); + struct mlx5e_post_meter_priv *post_meter; + enum mlx5_flow_namespace_type ns_type; + struct mlx5e_flow_meter_handle *meter; + enum mlx5e_post_meter_type type; + + if (IS_ERR(post_act)) + return PTR_ERR(post_act); + + meter = mlx5e_tc_meter_replace(priv->mdev, &attr->meter_attr.params); + if (IS_ERR(meter)) { + mlx5_core_err(priv->mdev, "Failed to get flow meter\n"); + return PTR_ERR(meter); + } + + ns_type = mlx5e_tc_meter_get_namespace(meter->flow_meters); + type = meter->params.mtu ? MLX5E_POST_METER_MTU : MLX5E_POST_METER_RATE; + post_meter = mlx5e_post_meter_init(priv, ns_type, post_act, + type, + meter->act_counter, meter->drop_counter, + attr->branch_true, attr->branch_false); + if (IS_ERR(post_meter)) { + mlx5_core_err(priv->mdev, "Failed to init post meter\n"); + goto err_meter_init; + } + + attr->meter_attr.meter = meter; + attr->meter_attr.post_meter = post_meter; + attr->dest_ft = mlx5e_post_meter_get_ft(post_meter); + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + return 0; + +err_meter_init: + mlx5e_tc_meter_put(meter); + return PTR_ERR(post_meter); +} + +static void +mlx5e_tc_del_flow_meter(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr) +{ + mlx5e_post_meter_cleanup(esw, attr->meter_attr.post_meter); + mlx5e_tc_meter_put(attr->meter_attr.meter); +} + +struct mlx5_flow_handle * +mlx5e_tc_rule_offload(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + int err; + + if (!is_mdev_switchdev_mode(priv->mdev)) + return mlx5e_add_offloaded_nic_rule(priv, spec, attr); + + if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) + return mlx5e_tc_sample_offload(get_sample_priv(priv), spec, attr); + + if (is_flow_meter_action(attr)) { + err = mlx5e_tc_add_flow_meter(priv, attr); + if (err) + return ERR_PTR(err); + } + + return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); +} + +void +mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (!is_mdev_switchdev_mode(priv->mdev)) { + mlx5e_del_offloaded_nic_rule(priv, rule, attr); + return; + } + + if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) { + mlx5e_tc_sample_unoffload(get_sample_priv(priv), rule, attr); + return; + } + + mlx5_eswitch_del_offloaded_rule(esw, rule, attr); + + if (attr->meter_attr.meter) + mlx5e_tc_del_flow_meter(esw, attr); +} + +int +mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5_flow_namespace_type ns, + enum mlx5e_tc_attr_to_reg type, + u32 data) +{ + int ret = mlx5e_tc_match_to_reg_set_and_get_id(mdev, mod_hdr_acts, ns, type, data); + + return ret < 0 ? ret : 0; +} + +void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5e_tc_attr_to_reg type, + int act_id, u32 data) +{ + int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; + int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield; + int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen; + char *modact; + + modact = mlx5e_mod_hdr_get_item(mod_hdr_acts, act_id); + + /* Firmware has 5bit length field and 0 means 32bits */ + if (mlen == 32) + mlen = 0; + + MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, modact, field, mfield); + MLX5_SET(set_action_in, modact, offset, moffset); + MLX5_SET(set_action_in, modact, length, mlen); + MLX5_SET(set_action_in, modact, data, data); +} + +struct mlx5e_hairpin { + struct mlx5_hairpin *pair; + + struct mlx5_core_dev *func_mdev; + struct mlx5e_priv *func_priv; + u32 tdn; + struct mlx5e_tir direct_tir; + + int num_channels; + u8 log_num_packets; + struct mlx5e_rqt indir_rqt; + struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; + struct mlx5_ttc_table *ttc; +}; + +struct mlx5e_hairpin_entry { + /* a node of a hash table which keeps all the hairpin entries */ + struct hlist_node hairpin_hlist; + + /* protects flows list */ + spinlock_t flows_lock; + /* flows sharing the same hairpin */ + struct list_head flows; + /* hpe's that were not fully initialized when dead peer update event + * function traversed them. + */ + struct list_head dead_peer_wait_list; + + u16 peer_vhca_id; + u8 prio; + struct mlx5e_hairpin *hp; + refcount_t refcnt; + struct completion res_ready; +}; + +static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); + +struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow) +{ + if (!flow || !refcount_inc_not_zero(&flow->refcnt)) + return ERR_PTR(-EINVAL); + return flow; +} + +void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) +{ + if (refcount_dec_and_test(&flow->refcnt)) { + mlx5e_tc_del_flow(priv, flow); + kfree_rcu(flow, rcu_head); + } +} + +bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow) +{ + return flow_flag_test(flow, ESWITCH); +} + +bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow) +{ + return flow_flag_test(flow, FT); +} + +bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow) +{ + return flow_flag_test(flow, OFFLOADED); +} + +int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow) +{ + return mlx5e_is_eswitch_flow(flow) ? + MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL; +} + +static struct mlx5_core_dev * +get_flow_counter_dev(struct mlx5e_tc_flow *flow) +{ + return mlx5e_is_eswitch_flow(flow) ? flow->attr->esw_attr->counter_dev : flow->priv->mdev; +} + +static struct mod_hdr_tbl * +get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + return mlx5e_get_flow_namespace(flow) == MLX5_FLOW_NAMESPACE_FDB ? + &esw->offloads.mod_hdr : + &tc->mod_hdr; +} + +int mlx5e_tc_attach_mod_hdr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_mod_hdr_handle *mh; + + mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow), + mlx5e_get_flow_namespace(flow), + &attr->parse_attr->mod_hdr_acts); + if (IS_ERR(mh)) + return PTR_ERR(mh); + + WARN_ON(attr->modify_hdr); + attr->modify_hdr = mlx5e_mod_hdr_get(mh); + attr->mh = mh; + + return 0; +} + +void mlx5e_tc_detach_mod_hdr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr) +{ + /* flow wasn't fully initialized */ + if (!attr->mh) + return; + + mlx5e_mod_hdr_detach(priv->mdev, get_mod_hdr_table(priv, flow), + attr->mh); + attr->mh = NULL; +} + +static +struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex) +{ + struct mlx5_core_dev *mdev; + struct net_device *netdev; + struct mlx5e_priv *priv; + + netdev = dev_get_by_index(net, ifindex); + if (!netdev) + return ERR_PTR(-ENODEV); + + priv = netdev_priv(netdev); + mdev = priv->mdev; + dev_put(netdev); + + /* Mirred tc action holds a refcount on the ifindex net_device (see + * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev + * after dev_put(netdev), while we're in the context of adding a tc flow. + * + * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then + * stored in a hairpin object, which exists until all flows, that refer to it, get + * removed. + * + * On the other hand, after a hairpin object has been created, the peer net_device may + * be removed/unbound while there are still some hairpin flows that are using it. This + * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to + * NETDEV_UNREGISTER event of the peer net_device. + */ + return mdev; +} + +static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp) +{ + struct mlx5e_tir_builder *builder; + int err; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn); + if (err) + goto out; + + mlx5e_tir_builder_build_inline(builder, hp->tdn, hp->pair->rqn[0]); + err = mlx5e_tir_init(&hp->direct_tir, builder, hp->func_mdev, false); + if (err) + goto create_tir_err; + +out: + mlx5e_tir_builder_free(builder); + return err; + +create_tir_err: + mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); + + goto out; +} + +static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp) +{ + mlx5e_tir_destroy(&hp->direct_tir); + mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); +} + +static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp) +{ + struct mlx5e_priv *priv = hp->func_priv; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_rss_params_indir *indir; + int err; + + indir = kvmalloc(sizeof(*indir), GFP_KERNEL); + if (!indir) + return -ENOMEM; + + mlx5e_rss_params_indir_init_uniform(indir, hp->num_channels); + err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels, + mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc, + indir); + + kvfree(indir); + return err; +} + +static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp) +{ + struct mlx5e_priv *priv = hp->func_priv; + struct mlx5e_rss_params_hash rss_hash; + enum mlx5_traffic_types tt, max_tt; + struct mlx5e_tir_builder *builder; + int err = 0; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res); + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + struct mlx5e_rss_params_traffic_type rss_tt; + + rss_tt = mlx5e_rss_get_default_tt_config(tt); + + mlx5e_tir_builder_build_rqt(builder, hp->tdn, + mlx5e_rqt_get_rqtn(&hp->indir_rqt), + false); + mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false); + + err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false); + if (err) { + mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err); + goto err_destroy_tirs; + } + + mlx5e_tir_builder_clear(builder); + } + +out: + mlx5e_tir_builder_free(builder); + return err; + +err_destroy_tirs: + max_tt = tt; + for (tt = 0; tt < max_tt; tt++) + mlx5e_tir_destroy(&hp->indir_tir[tt]); + + goto out; +} + +static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp) +{ + int tt; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + mlx5e_tir_destroy(&hp->indir_tir[tt]); +} + +static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp, + struct ttc_params *ttc_params) +{ + struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; + int tt; + + memset(ttc_params, 0, sizeof(*ttc_params)); + + ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev, + MLX5_FLOW_NAMESPACE_KERNEL); + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; + ttc_params->dests[tt].tir_num = + tt == MLX5_TT_ANY ? + mlx5e_tir_get_tirn(&hp->direct_tir) : + mlx5e_tir_get_tirn(&hp->indir_tir[tt]); + } + + ft_attr->level = MLX5E_TC_TTC_FT_LEVEL; + ft_attr->prio = MLX5E_TC_PRIO; +} + +static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp) +{ + struct mlx5e_priv *priv = hp->func_priv; + struct ttc_params ttc_params; + struct mlx5_ttc_table *ttc; + int err; + + err = mlx5e_hairpin_create_indirect_rqt(hp); + if (err) + return err; + + err = mlx5e_hairpin_create_indirect_tirs(hp); + if (err) + goto err_create_indirect_tirs; + + mlx5e_hairpin_set_ttc_params(hp, &ttc_params); + hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params); + if (IS_ERR(hp->ttc)) { + err = PTR_ERR(hp->ttc); + goto err_create_ttc_table; + } + + ttc = mlx5e_fs_get_ttc(priv->fs, false); + netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n", + hp->num_channels, + mlx5_get_ttc_flow_table(ttc)->id); + + return 0; + +err_create_ttc_table: + mlx5e_hairpin_destroy_indirect_tirs(hp); +err_create_indirect_tirs: + mlx5e_rqt_destroy(&hp->indir_rqt); + + return err; +} + +static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp) +{ + mlx5_destroy_ttc_table(hp->ttc); + mlx5e_hairpin_destroy_indirect_tirs(hp); + mlx5e_rqt_destroy(&hp->indir_rqt); +} + +static struct mlx5e_hairpin * +mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params, + int peer_ifindex) +{ + struct mlx5_core_dev *func_mdev, *peer_mdev; + struct mlx5e_hairpin *hp; + struct mlx5_hairpin *pair; + int err; + + hp = kzalloc(sizeof(*hp), GFP_KERNEL); + if (!hp) + return ERR_PTR(-ENOMEM); + + func_mdev = priv->mdev; + peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); + if (IS_ERR(peer_mdev)) { + err = PTR_ERR(peer_mdev); + goto create_pair_err; + } + + pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params); + if (IS_ERR(pair)) { + err = PTR_ERR(pair); + goto create_pair_err; + } + hp->pair = pair; + hp->func_mdev = func_mdev; + hp->func_priv = priv; + hp->num_channels = params->num_channels; + hp->log_num_packets = params->log_num_packets; + + err = mlx5e_hairpin_create_transport(hp); + if (err) + goto create_transport_err; + + if (hp->num_channels > 1) { + err = mlx5e_hairpin_rss_init(hp); + if (err) + goto rss_init_err; + } + + return hp; + +rss_init_err: + mlx5e_hairpin_destroy_transport(hp); +create_transport_err: + mlx5_core_hairpin_destroy(hp->pair); +create_pair_err: + kfree(hp); + return ERR_PTR(err); +} + +static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp) +{ + if (hp->num_channels > 1) + mlx5e_hairpin_rss_cleanup(hp); + mlx5e_hairpin_destroy_transport(hp); + mlx5_core_hairpin_destroy(hp->pair); + kvfree(hp); +} + +static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio) +{ + return (peer_vhca_id << 16 | prio); +} + +static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv, + u16 peer_vhca_id, u8 prio) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5e_hairpin_entry *hpe; + u32 hash_key = hash_hairpin_info(peer_vhca_id, prio); + + hash_for_each_possible(tc->hairpin_tbl, hpe, + hairpin_hlist, hash_key) { + if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) { + refcount_inc(&hpe->refcnt); + return hpe; + } + } + + return NULL; +} + +static void mlx5e_hairpin_put(struct mlx5e_priv *priv, + struct mlx5e_hairpin_entry *hpe) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + /* no more hairpin flows for us, release the hairpin pair */ + if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &tc->hairpin_tbl_lock)) + return; + hash_del(&hpe->hairpin_hlist); + mutex_unlock(&tc->hairpin_tbl_lock); + + if (!IS_ERR_OR_NULL(hpe->hp)) { + netdev_dbg(priv->netdev, "del hairpin: peer %s\n", + dev_name(hpe->hp->pair->peer_mdev->device)); + + mlx5e_hairpin_destroy(hpe->hp); + } + + WARN_ON(!list_empty(&hpe->flows)); + kfree(hpe); +} + +#define UNKNOWN_MATCH_PRIO 8 + +static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, u8 *match_prio, + struct netlink_ext_ack *extack) +{ + void *headers_c, *headers_v; + u8 prio_val, prio_mask = 0; + bool vlan_present; + +#ifdef CONFIG_MLX5_CORE_EN_DCB + if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) { + NL_SET_ERR_MSG_MOD(extack, + "only PCP trust state supported for hairpin"); + return -EOPNOTSUPP; + } +#endif + headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + + vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag); + if (vlan_present) { + prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio); + prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio); + } + + if (!vlan_present || !prio_mask) { + prio_val = UNKNOWN_MATCH_PRIO; + } else if (prio_mask != 0x7) { + NL_SET_ERR_MSG_MOD(extack, + "masked priority match not supported for hairpin"); + return -EOPNOTSUPP; + } + + *match_prio = prio_val; + return 0; +} + +static int debugfs_hairpin_num_active_get(void *data, u64 *val) +{ + struct mlx5e_tc_table *tc = data; + struct mlx5e_hairpin_entry *hpe; + u32 cnt = 0; + u32 bkt; + + mutex_lock(&tc->hairpin_tbl_lock); + hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist) + cnt++; + mutex_unlock(&tc->hairpin_tbl_lock); + + *val = cnt; + + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(fops_hairpin_num_active, + debugfs_hairpin_num_active_get, NULL, "%llu\n"); + +static int debugfs_hairpin_table_dump_show(struct seq_file *file, void *priv) + +{ + struct mlx5e_tc_table *tc = file->private; + struct mlx5e_hairpin_entry *hpe; + u32 bkt; + + mutex_lock(&tc->hairpin_tbl_lock); + hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist) + seq_printf(file, + "Hairpin peer_vhca_id %u prio %u refcnt %u num_channels %u num_packets %lu\n", + hpe->peer_vhca_id, hpe->prio, + refcount_read(&hpe->refcnt), hpe->hp->num_channels, + BIT(hpe->hp->log_num_packets)); + mutex_unlock(&tc->hairpin_tbl_lock); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(debugfs_hairpin_table_dump); + +static void mlx5e_tc_debugfs_init(struct mlx5e_tc_table *tc, + struct dentry *dfs_root) +{ + if (IS_ERR_OR_NULL(dfs_root)) + return; + + tc->dfs_root = debugfs_create_dir("tc", dfs_root); + + debugfs_create_file("hairpin_num_active", 0444, tc->dfs_root, tc, + &fops_hairpin_num_active); + debugfs_create_file("hairpin_table_dump", 0444, tc->dfs_root, tc, + &debugfs_hairpin_table_dump_fops); +} + +static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct netlink_ext_ack *extack) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct devlink *devlink = priv_to_devlink(priv->mdev); + int peer_ifindex = parse_attr->mirred_ifindex[0]; + union devlink_param_value val = {}; + struct mlx5_hairpin_params params; + struct mlx5_core_dev *peer_mdev; + struct mlx5e_hairpin_entry *hpe; + struct mlx5e_hairpin *hp; + u8 match_prio; + u16 peer_id; + int err; + + peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); + if (IS_ERR(peer_mdev)) { + NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device"); + return PTR_ERR(peer_mdev); + } + + if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) { + NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported"); + return -EOPNOTSUPP; + } + + peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id); + err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio, + extack); + if (err) + return err; + + mutex_lock(&tc->hairpin_tbl_lock); + hpe = mlx5e_hairpin_get(priv, peer_id, match_prio); + if (hpe) { + mutex_unlock(&tc->hairpin_tbl_lock); + wait_for_completion(&hpe->res_ready); + + if (IS_ERR(hpe->hp)) { + err = -EREMOTEIO; + goto out_err; + } + goto attach_flow; + } + + hpe = kzalloc(sizeof(*hpe), GFP_KERNEL); + if (!hpe) { + mutex_unlock(&tc->hairpin_tbl_lock); + return -ENOMEM; + } + + spin_lock_init(&hpe->flows_lock); + INIT_LIST_HEAD(&hpe->flows); + INIT_LIST_HEAD(&hpe->dead_peer_wait_list); + hpe->peer_vhca_id = peer_id; + hpe->prio = match_prio; + refcount_set(&hpe->refcnt, 1); + init_completion(&hpe->res_ready); + + hash_add(tc->hairpin_tbl, &hpe->hairpin_hlist, + hash_hairpin_info(peer_id, match_prio)); + mutex_unlock(&tc->hairpin_tbl_lock); + + err = devl_param_driverinit_value_get( + devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, &val); + if (err) { + err = -ENOMEM; + goto out_err; + } + + params.log_num_packets = ilog2(val.vu32); + params.log_data_size = + clamp_t(u32, + params.log_num_packets + + MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev), + MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz), + MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz)); + + params.q_counter = priv->q_counter; + err = devl_param_driverinit_value_get( + devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, &val); + if (err) { + err = -ENOMEM; + goto out_err; + } + + params.num_channels = val.vu32; + + hp = mlx5e_hairpin_create(priv, ¶ms, peer_ifindex); + hpe->hp = hp; + complete_all(&hpe->res_ready); + if (IS_ERR(hp)) { + err = PTR_ERR(hp); + goto out_err; + } + + netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n", + mlx5e_tir_get_tirn(&hp->direct_tir), hp->pair->rqn[0], + dev_name(hp->pair->peer_mdev->device), + hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets); + +attach_flow: + if (hpe->hp->num_channels > 1) { + flow_flag_set(flow, HAIRPIN_RSS); + flow->attr->nic_attr->hairpin_ft = + mlx5_get_ttc_flow_table(hpe->hp->ttc); + } else { + flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir); + } + + flow->hpe = hpe; + spin_lock(&hpe->flows_lock); + list_add(&flow->hairpin, &hpe->flows); + spin_unlock(&hpe->flows_lock); + + return 0; + +out_err: + mlx5e_hairpin_put(priv, hpe); + return err; +} + +static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + /* flow wasn't fully initialized */ + if (!flow->hpe) + return; + + spin_lock(&flow->hpe->flows_lock); + list_del(&flow->hairpin); + spin_unlock(&flow->hpe->flows_lock); + + mlx5e_hairpin_put(priv, flow->hpe); + flow->hpe = NULL; +} + +struct mlx5_flow_handle * +mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr) +{ + struct mlx5_flow_context *flow_context = &spec->flow_context; + struct mlx5e_vlan_table *vlan = mlx5e_fs_get_vlan(priv->fs); + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr; + struct mlx5_flow_destination dest[2] = {}; + struct mlx5_fs_chains *nic_chains; + struct mlx5_flow_act flow_act = { + .action = attr->action, + .flags = FLOW_ACT_NO_APPEND, + }; + struct mlx5_flow_handle *rule; + struct mlx5_flow_table *ft; + int dest_ix = 0; + + nic_chains = mlx5e_nic_chains(tc); + flow_context->flags |= FLOW_CONTEXT_HAS_TAG; + flow_context->flow_tag = nic_attr->flow_tag; + + if (attr->dest_ft) { + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[dest_ix].ft = attr->dest_ft; + dest_ix++; + } else if (nic_attr->hairpin_ft) { + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[dest_ix].ft = nic_attr->hairpin_ft; + dest_ix++; + } else if (nic_attr->hairpin_tirn) { + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest[dest_ix].tir_num = nic_attr->hairpin_tirn; + dest_ix++; + } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + if (attr->dest_chain) { + dest[dest_ix].ft = mlx5_chains_get_table(nic_chains, + attr->dest_chain, 1, + MLX5E_TC_FT_LEVEL); + if (IS_ERR(dest[dest_ix].ft)) + return ERR_CAST(dest[dest_ix].ft); + } else { + dest[dest_ix].ft = mlx5e_vlan_get_flowtable(vlan); + } + dest_ix++; + } + + if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && + MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[dest_ix].counter_id = mlx5_fc_id(attr->counter); + dest_ix++; + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + flow_act.modify_hdr = attr->modify_hdr; + + mutex_lock(&tc->t_lock); + if (IS_ERR_OR_NULL(tc->t)) { + /* Create the root table here if doesn't exist yet */ + tc->t = + mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL); + + if (IS_ERR(tc->t)) { + mutex_unlock(&tc->t_lock); + netdev_err(priv->netdev, + "Failed to create tc offload table\n"); + rule = ERR_CAST(tc->t); + goto err_ft_get; + } + } + mutex_unlock(&tc->t_lock); + + if (attr->chain || attr->prio) + ft = mlx5_chains_get_table(nic_chains, + attr->chain, attr->prio, + MLX5E_TC_FT_LEVEL); + else + ft = attr->ft; + + if (IS_ERR(ft)) { + rule = ERR_CAST(ft); + goto err_ft_get; + } + + if (attr->outer_match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + + rule = mlx5_add_flow_rules(ft, spec, + &flow_act, dest, dest_ix); + if (IS_ERR(rule)) + goto err_rule; + + return rule; + +err_rule: + if (attr->chain || attr->prio) + mlx5_chains_put_table(nic_chains, + attr->chain, attr->prio, + MLX5E_TC_FT_LEVEL); +err_ft_get: + if (attr->dest_chain) + mlx5_chains_put_table(nic_chains, + attr->dest_chain, 1, + MLX5E_TC_FT_LEVEL); + + return ERR_CAST(rule); +} + +static int +alloc_flow_attr_counter(struct mlx5_core_dev *counter_dev, + struct mlx5_flow_attr *attr) + +{ + struct mlx5_fc *counter; + + counter = mlx5_fc_create(counter_dev, true); + if (IS_ERR(counter)) + return PTR_ERR(counter); + + attr->counter = counter; + return 0; +} + +static int +mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; + struct mlx5_core_dev *dev = priv->mdev; + int err; + + parse_attr = attr->parse_attr; + + if (flow_flag_test(flow, HAIRPIN)) { + err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); + if (err) + return err; + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + err = alloc_flow_attr_counter(dev, attr); + if (err) + return err; + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + err = mlx5e_tc_attach_mod_hdr(priv, flow, attr); + if (err) + return err; + } + + flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec, attr); + return PTR_ERR_OR_ZERO(flow->rule[0]); +} + +void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5_fs_chains *nic_chains; + + nic_chains = mlx5e_nic_chains(tc); + mlx5_del_flow_rules(rule); + + if (attr->chain || attr->prio) + mlx5_chains_put_table(nic_chains, attr->chain, attr->prio, + MLX5E_TC_FT_LEVEL); + + if (attr->dest_chain) + mlx5_chains_put_table(nic_chains, attr->dest_chain, 1, + MLX5E_TC_FT_LEVEL); +} + +static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5_flow_attr *attr = flow->attr; + + flow_flag_clear(flow, OFFLOADED); + + if (!IS_ERR_OR_NULL(flow->rule[0])) + mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr); + + /* Remove root table if no rules are left to avoid + * extra steering hops. + */ + mutex_lock(&tc->t_lock); + if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && + !IS_ERR_OR_NULL(tc->t)) { + mlx5_chains_put_table(mlx5e_nic_chains(tc), 0, 1, MLX5E_TC_FT_LEVEL); + tc->t = NULL; + } + mutex_unlock(&tc->t_lock); + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); + mlx5e_tc_detach_mod_hdr(priv, flow, attr); + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) + mlx5_fc_destroy(priv->mdev, attr->counter); + + if (flow_flag_test(flow, HAIRPIN)) + mlx5e_hairpin_flow_del(priv, flow); + + free_flow_post_acts(flow); + mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr); + + kvfree(attr->parse_attr); + kfree(flow->attr); +} + +struct mlx5_flow_handle * +mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr) +{ + struct mlx5_flow_handle *rule; + + if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) + return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); + + rule = mlx5e_tc_rule_offload(flow->priv, spec, attr); + + if (IS_ERR(rule)) + return rule; + + if (attr->esw_attr->split_count) { + flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr); + if (IS_ERR(flow->rule[1])) + goto err_rule1; + } + + return rule; + +err_rule1: + mlx5e_tc_rule_unoffload(flow->priv, rule, attr); + return flow->rule[1]; +} + +void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr) +{ + flow_flag_clear(flow, OFFLOADED); + + if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) + return mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); + + if (attr->esw_attr->split_count) + mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); + + mlx5e_tc_rule_unoffload(flow->priv, flow->rule[0], attr); +} + +struct mlx5_flow_handle * +mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec) +{ + struct mlx5e_tc_mod_hdr_acts mod_acts = {}; + struct mlx5e_mod_hdr_handle *mh = NULL; + struct mlx5_flow_attr *slow_attr; + struct mlx5_flow_handle *rule; + bool fwd_and_modify_cap; + u32 chain_mapping = 0; + int err; + + slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); + if (!slow_attr) + return ERR_PTR(-ENOMEM); + + memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ); + slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + slow_attr->esw_attr->split_count = 0; + slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH; + + fwd_and_modify_cap = MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table); + if (!fwd_and_modify_cap) + goto skip_restore; + + err = mlx5_chains_get_chain_mapping(esw_chains(esw), flow->attr->chain, &chain_mapping); + if (err) + goto err_get_chain; + + err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, + MAPPED_OBJ_TO_REG, chain_mapping); + if (err) + goto err_reg_set; + + mh = mlx5e_mod_hdr_attach(esw->dev, get_mod_hdr_table(flow->priv, flow), + MLX5_FLOW_NAMESPACE_FDB, &mod_acts); + if (IS_ERR(mh)) { + err = PTR_ERR(mh); + goto err_attach; + } + + slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + slow_attr->modify_hdr = mlx5e_mod_hdr_get(mh); + +skip_restore: + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto err_offload; + } + + flow->attr->slow_mh = mh; + flow->chain_mapping = chain_mapping; + flow_flag_set(flow, SLOW); + + mlx5e_mod_hdr_dealloc(&mod_acts); + kfree(slow_attr); + + return rule; + +err_offload: + if (fwd_and_modify_cap) + mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), mh); +err_attach: +err_reg_set: + if (fwd_and_modify_cap) + mlx5_chains_put_chain_mapping(esw_chains(esw), chain_mapping); +err_get_chain: + mlx5e_mod_hdr_dealloc(&mod_acts); + kfree(slow_attr); + return ERR_PTR(err); +} + +void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow) +{ + struct mlx5e_mod_hdr_handle *slow_mh = flow->attr->slow_mh; + struct mlx5_flow_attr *slow_attr; + + slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); + if (!slow_attr) { + mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n"); + return; + } + + memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ); + slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + slow_attr->esw_attr->split_count = 0; + slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH; + if (slow_mh) { + slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + slow_attr->modify_hdr = mlx5e_mod_hdr_get(slow_mh); + } + mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); + if (slow_mh) { + mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), slow_mh); + mlx5_chains_put_chain_mapping(esw_chains(esw), flow->chain_mapping); + flow->chain_mapping = 0; + flow->attr->slow_mh = NULL; + } + flow_flag_clear(flow, SLOW); + kfree(slow_attr); +} + +/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this + * function. + */ +static void unready_flow_add(struct mlx5e_tc_flow *flow, + struct list_head *unready_flows) +{ + flow_flag_set(flow, NOT_READY); + list_add_tail(&flow->unready, unready_flows); +} + +/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this + * function. + */ +static void unready_flow_del(struct mlx5e_tc_flow *flow) +{ + list_del(&flow->unready); + flow_flag_clear(flow, NOT_READY); +} + +static void add_unready_flow(struct mlx5e_tc_flow *flow) +{ + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *rpriv; + struct mlx5_eswitch *esw; + + esw = flow->priv->mdev->priv.eswitch; + rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &rpriv->uplink_priv; + + mutex_lock(&uplink_priv->unready_flows_lock); + unready_flow_add(flow, &uplink_priv->unready_flows); + mutex_unlock(&uplink_priv->unready_flows_lock); +} + +static void remove_unready_flow(struct mlx5e_tc_flow *flow) +{ + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *rpriv; + struct mlx5_eswitch *esw; + + esw = flow->priv->mdev->priv.eswitch; + rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &rpriv->uplink_priv; + + mutex_lock(&uplink_priv->unready_flows_lock); + if (flow_flag_test(flow, NOT_READY)) + unready_flow_del(flow); + mutex_unlock(&uplink_priv->unready_flows_lock); +} + +bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev) +{ + struct mlx5_core_dev *out_mdev, *route_mdev; + struct mlx5e_priv *out_priv, *route_priv; + + out_priv = netdev_priv(out_dev); + out_mdev = out_priv->mdev; + route_priv = netdev_priv(route_dev); + route_mdev = route_priv->mdev; + + if (out_mdev->coredev_type != MLX5_COREDEV_PF) + return false; + + if (route_mdev->coredev_type != MLX5_COREDEV_VF && + route_mdev->coredev_type != MLX5_COREDEV_SF) + return false; + + return mlx5e_same_hw_devs(out_priv, route_priv); +} + +int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport) +{ + struct mlx5e_priv *out_priv, *route_priv; + struct mlx5_core_dev *route_mdev; + struct mlx5_devcom_comp_dev *pos; + struct mlx5_eswitch *esw; + u16 vhca_id; + int err; + + out_priv = netdev_priv(out_dev); + esw = out_priv->mdev->priv.eswitch; + route_priv = netdev_priv(route_dev); + route_mdev = route_priv->mdev; + + vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id); + err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); + if (!err) + return err; + + if (!mlx5_lag_is_active(out_priv->mdev)) + return err; + + rcu_read_lock(); + err = -ENODEV; + mlx5_devcom_for_each_peer_entry_rcu(esw->devcom, esw, pos) { + err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); + if (!err) + break; + } + rcu_read_unlock(); + + return err; +} + +static int +verify_attr_actions(u32 actions, struct netlink_ext_ack *extack) +{ + if (!(actions & + (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { + NL_SET_ERR_MSG_MOD(extack, "Rule must have at least one forward/drop action"); + return -EOPNOTSUPP; + } + + if (!(~actions & + (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { + NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action"); + return -EOPNOTSUPP; + } + + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && + actions & MLX5_FLOW_CONTEXT_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported"); + return -EOPNOTSUPP; + } + + return 0; +} + +static bool +has_encap_dests(struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + int out_index; + + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) + if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) + return true; + + return false; +} + +static int +post_process_attr(struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct netlink_ext_ack *extack) +{ + bool vf_tun; + int err = 0; + + err = verify_attr_actions(attr->action, extack); + if (err) + goto err_out; + + if (mlx5e_is_eswitch_flow(flow) && has_encap_dests(attr)) { + err = mlx5e_tc_tun_encap_dests_set(flow->priv, flow, attr, extack, &vf_tun); + if (err) + goto err_out; + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr); + if (err) + goto err_out; + } + + if (attr->branch_true && + attr->branch_true->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr->branch_true); + if (err) + goto err_out; + } + + if (attr->branch_false && + attr->branch_false->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr->branch_false); + if (err) + goto err_out; + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + err = alloc_flow_attr_counter(get_flow_counter_dev(flow), attr); + if (err) + goto err_out; + } + +err_out: + return err; +} + +static int +mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; + struct mlx5_esw_flow_attr *esw_attr; + u32 max_prio, max_chain; + int err = 0; + + parse_attr = attr->parse_attr; + esw_attr = attr->esw_attr; + + /* We check chain range only for tc flows. + * For ft flows, we checked attr->chain was originally 0 and set it to + * FDB_FT_CHAIN which is outside tc range. + * See mlx5e_rep_setup_ft_cb(). + */ + max_chain = mlx5_chains_get_chain_range(esw_chains(esw)); + if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) { + NL_SET_ERR_MSG_MOD(extack, + "Requested chain is out of supported range"); + err = -EOPNOTSUPP; + goto err_out; + } + + max_prio = mlx5_chains_get_prio_range(esw_chains(esw)); + if (attr->prio > max_prio) { + NL_SET_ERR_MSG_MOD(extack, + "Requested priority is out of supported range"); + err = -EOPNOTSUPP; + goto err_out; + } + + if (flow_flag_test(flow, TUN_RX)) { + err = mlx5e_attach_decap_route(priv, flow); + if (err) + goto err_out; + + if (!attr->chain && esw_attr->int_port && + attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + /* If decap route device is internal port, change the + * source vport value in reg_c0 back to uplink just in + * case the rule performs goto chain > 0. If we have a miss + * on chain > 0 we want the metadata regs to hold the + * chain id so SW will resume handling of this packet + * from the proper chain. + */ + u32 metadata = mlx5_eswitch_get_vport_metadata_for_set(esw, + esw_attr->in_rep->vport); + + err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts, + MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG, + metadata); + if (err) + goto err_out; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + } + } + + if (flow_flag_test(flow, L3_TO_L2_DECAP)) { + err = mlx5e_attach_decap(priv, flow, extack); + if (err) + goto err_out; + } + + if (netif_is_ovs_master(parse_attr->filter_dev)) { + struct mlx5e_tc_int_port *int_port; + + if (attr->chain) { + NL_SET_ERR_MSG_MOD(extack, + "Internal port rule is only supported on chain 0"); + err = -EOPNOTSUPP; + goto err_out; + } + + if (attr->dest_chain) { + NL_SET_ERR_MSG_MOD(extack, + "Internal port rule offload doesn't support goto action"); + err = -EOPNOTSUPP; + goto err_out; + } + + int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv), + parse_attr->filter_dev->ifindex, + flow_flag_test(flow, EGRESS) ? + MLX5E_TC_INT_PORT_EGRESS : + MLX5E_TC_INT_PORT_INGRESS); + if (IS_ERR(int_port)) { + err = PTR_ERR(int_port); + goto err_out; + } + + esw_attr->int_port = int_port; + } + + err = post_process_attr(flow, attr, extack); + if (err) + goto err_out; + + err = mlx5e_tc_act_stats_add_flow(get_act_stats_handle(priv), flow); + if (err) + goto err_out; + + /* we get here if one of the following takes place: + * (1) there's no error + * (2) there's an encap action and we don't have valid neigh + */ + if (flow_flag_test(flow, SLOW)) + flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec); + else + flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); + + if (IS_ERR(flow->rule[0])) { + err = PTR_ERR(flow->rule[0]); + goto err_out; + } + flow_flag_set(flow, OFFLOADED); + + return 0; + +err_out: + flow_flag_set(flow, FAILED); + return err; +} + +static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) +{ + struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec; + void *headers_v = MLX5_ADDR_OF(fte_match_param, + spec->match_value, + misc_parameters_3); + u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3, + headers_v, + geneve_tlv_option_0_data); + + return !!geneve_tlv_opt_0_data; +} + +static void free_branch_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr) +{ + if (!attr) + return; + + mlx5_free_flow_attr_actions(flow, attr); + kvfree(attr->parse_attr); + kfree(attr); +} + +static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_flow_attr *attr = flow->attr; + + mlx5e_put_flow_tunnel_id(flow); + + remove_unready_flow(flow); + + if (mlx5e_is_offloaded_flow(flow)) { + if (flow_flag_test(flow, SLOW)) + mlx5e_tc_unoffload_from_slow_path(esw, flow); + else + mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); + } + complete_all(&flow->del_hw_done); + + if (mlx5_flow_has_geneve_opt(flow)) + mlx5_geneve_tlv_option_del(priv->mdev->geneve); + + if (flow->decap_route) + mlx5e_detach_decap_route(priv, flow); + + mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr); + + if (flow_flag_test(flow, L3_TO_L2_DECAP)) + mlx5e_detach_decap(priv, flow); + + mlx5e_tc_act_stats_del_flow(get_act_stats_handle(priv), flow); + + free_flow_post_acts(flow); + mlx5_free_flow_attr_actions(flow, attr); + + kvfree(attr->esw_attr->rx_tun_attr); + kvfree(attr->parse_attr); + kfree(flow->attr); +} + +struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) +{ + struct mlx5_flow_attr *attr; + + attr = list_first_entry(&flow->attrs, struct mlx5_flow_attr, list); + return attr->counter; +} + +/* Iterate over tmp_list of flows attached to flow_list head. */ +void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list) +{ + struct mlx5e_tc_flow *flow, *tmp; + + list_for_each_entry_safe(flow, tmp, flow_list, tmp_list) + mlx5e_flow_put(priv, flow); +} + +static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow, + int peer_index) +{ + struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; + struct mlx5e_tc_flow *peer_flow; + struct mlx5e_tc_flow *tmp; + + if (!flow_flag_test(flow, ESWITCH) || + !flow_flag_test(flow, DUP)) + return; + + mutex_lock(&esw->offloads.peer_mutex); + list_del(&flow->peer[peer_index]); + mutex_unlock(&esw->offloads.peer_mutex); + + list_for_each_entry_safe(peer_flow, tmp, &flow->peer_flows, peer_flows) { + if (peer_index != mlx5_get_dev_index(peer_flow->priv->mdev)) + continue; + + list_del(&peer_flow->peer_flows); + if (refcount_dec_and_test(&peer_flow->refcnt)) { + mlx5e_tc_del_fdb_flow(peer_flow->priv, peer_flow); + kfree(peer_flow); + } + } + + if (list_empty(&flow->peer_flows)) + flow_flag_clear(flow, DUP); +} + +static void mlx5e_tc_del_fdb_peers_flow(struct mlx5e_tc_flow *flow) +{ + int i; + + for (i = 0; i < MLX5_MAX_PORTS; i++) { + if (i == mlx5_get_dev_index(flow->priv->mdev)) + continue; + mlx5e_tc_del_fdb_peer_flow(flow, i); + } +} + +static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + if (mlx5e_is_eswitch_flow(flow)) { + struct mlx5_devcom_comp_dev *devcom = flow->priv->mdev->priv.eswitch->devcom; + + if (!mlx5_devcom_for_each_peer_begin(devcom)) { + mlx5e_tc_del_fdb_flow(priv, flow); + return; + } + + mlx5e_tc_del_fdb_peers_flow(flow); + mlx5_devcom_for_each_peer_end(devcom); + mlx5e_tc_del_fdb_flow(priv, flow); + } else { + mlx5e_tc_del_nic_flow(priv, flow); + } +} + +static bool flow_requires_tunnel_mapping(u32 chain, struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_action *flow_action = &rule->action; + const struct flow_action_entry *act; + int i; + + if (chain) + return false; + + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_GOTO: + return true; + case FLOW_ACTION_SAMPLE: + return true; + default: + continue; + } + } + + return false; +} + +static int +enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv, + struct flow_dissector_key_enc_opts *opts, + struct netlink_ext_ack *extack, + bool *dont_care) +{ + struct geneve_opt *opt; + int off = 0; + + *dont_care = true; + + while (opts->len > off) { + opt = (struct geneve_opt *)&opts->data[off]; + + if (!(*dont_care) || opt->opt_class || opt->type || + memchr_inv(opt->opt_data, 0, opt->length * 4)) { + *dont_care = false; + + if (opt->opt_class != htons(U16_MAX) || + opt->type != U8_MAX) { + NL_SET_ERR_MSG_MOD(extack, + "Partial match of tunnel options in chain > 0 isn't supported"); + netdev_warn(priv->netdev, + "Partial match of tunnel options in chain > 0 isn't supported"); + return -EOPNOTSUPP; + } + } + + off += sizeof(struct geneve_opt) + opt->length * 4; + } + + return 0; +} + +#define COPY_DISSECTOR(rule, diss_key, dst)\ +({ \ + struct flow_rule *__rule = (rule);\ + typeof(dst) __dst = dst;\ +\ + memcpy(__dst,\ + skb_flow_dissector_target(__rule->match.dissector,\ + diss_key,\ + __rule->match.key),\ + sizeof(*__dst));\ +}) + +static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct flow_cls_offload *f, + struct net_device *filter_dev) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; + struct flow_match_enc_opts enc_opts_match; + struct tunnel_match_enc_opts tun_enc_opts; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5_flow_attr *attr = flow->attr; + struct mlx5e_rep_priv *uplink_rpriv; + struct tunnel_match_key tunnel_key; + bool enc_opts_is_dont_care = true; + u32 tun_id, enc_opts_id = 0; + struct mlx5_eswitch *esw; + u32 value, mask; + int err; + + esw = priv->mdev->priv.eswitch; + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + memset(&tunnel_key, 0, sizeof(tunnel_key)); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL, + &tunnel_key.enc_control); + if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + &tunnel_key.enc_ipv4); + else + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, + &tunnel_key.enc_ipv6); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS, + &tunnel_key.enc_tp); + COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID, + &tunnel_key.enc_key_id); + tunnel_key.filter_ifindex = filter_dev->ifindex; + + err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id); + if (err) + return err; + + flow_rule_match_enc_opts(rule, &enc_opts_match); + err = enc_opts_is_dont_care_or_full_match(priv, + enc_opts_match.mask, + extack, + &enc_opts_is_dont_care); + if (err) + goto err_enc_opts; + + if (!enc_opts_is_dont_care) { + memset(&tun_enc_opts, 0, sizeof(tun_enc_opts)); + memcpy(&tun_enc_opts.key, enc_opts_match.key, + sizeof(*enc_opts_match.key)); + memcpy(&tun_enc_opts.mask, enc_opts_match.mask, + sizeof(*enc_opts_match.mask)); + + err = mapping_add(uplink_priv->tunnel_enc_opts_mapping, + &tun_enc_opts, &enc_opts_id); + if (err) + goto err_enc_opts; + } + + value = tun_id << ENC_OPTS_BITS | enc_opts_id; + mask = enc_opts_id ? TUNNEL_ID_MASK : + (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK); + + if (attr->chain) { + mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec, + TUNNEL_TO_REG, value, mask); + } else { + mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; + err = mlx5e_tc_match_to_reg_set(priv->mdev, + mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB, + TUNNEL_TO_REG, value); + if (err) + goto err_set; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + } + + flow->attr->tunnel_id = value; + return 0; + +err_set: + if (enc_opts_id) + mapping_remove(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_id); +err_enc_opts: + mapping_remove(uplink_priv->tunnel_mapping, tun_id); + return err; +} + +static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow) +{ + u32 enc_opts_id = flow->attr->tunnel_id & ENC_OPTS_BITS_MASK; + u32 tun_id = flow->attr->tunnel_id >> ENC_OPTS_BITS; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5_eswitch *esw; + + esw = flow->priv->mdev->priv.eswitch; + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + if (tun_id) + mapping_remove(uplink_priv->tunnel_mapping, tun_id); + if (enc_opts_id) + mapping_remove(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_id); +} + +void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev, + struct flow_match_basic *match, bool outer, + void *headers_c, void *headers_v) +{ + bool ip_version_cap; + + ip_version_cap = outer ? + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_ip_version) : + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.inner_ip_version); + + if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) && + (match->key->n_proto == htons(ETH_P_IP) || + match->key->n_proto == htons(ETH_P_IPV6))) { + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version, + match->key->n_proto == htons(ETH_P_IP) ? 4 : 6); + } else { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, + ntohs(match->mask->n_proto)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + ntohs(match->key->n_proto)); + } +} + +u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer) +{ + void *headers_v; + u16 ethertype; + u8 ip_version; + + if (outer) + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + else + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers); + + ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version); + /* Return ip_version converted from ethertype anyway */ + if (!ip_version) { + ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); + if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP) + ip_version = 4; + else if (ethertype == ETH_P_IPV6) + ip_version = 6; + } + return ip_version; +} + +/* Tunnel device follows RFC 6040, see include/net/inet_ecn.h. + * And changes inner ip_ecn depending on inner and outer ip_ecn as follows: + * +---------+----------------------------------------+ + * |Arriving | Arriving Outer Header | + * | Inner +---------+---------+---------+----------+ + * | Header | Not-ECT | ECT(0) | ECT(1) | CE | + * +---------+---------+---------+---------+----------+ + * | Not-ECT | Not-ECT | Not-ECT | Not-ECT | | + * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE* | + * | ECT(1) | ECT(1) | ECT(1) | ECT(1)* | CE* | + * | CE | CE | CE | CE | CE | + * +---------+---------+---------+---------+----------+ + * + * Tc matches on inner after decapsulation on tunnel device, but hw offload matches + * the inner ip_ecn value before hardware decap action. + * + * Cells marked are changed from original inner packet ip_ecn value during decap, and + * so matching those values on inner ip_ecn before decap will fail. + * + * The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn, + * except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE, + * and such we can drop the inner ip_ecn=CE match. + */ + +static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv *priv, + struct flow_cls_offload *f, + bool *match_inner_ecn) +{ + u8 outer_ecn_mask = 0, outer_ecn_key = 0, inner_ecn_mask = 0, inner_ecn_key = 0; + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_ip match; + + *match_inner_ecn = true; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { + flow_rule_match_enc_ip(rule, &match); + outer_ecn_key = match.key->tos & INET_ECN_MASK; + outer_ecn_mask = match.mask->tos & INET_ECN_MASK; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { + flow_rule_match_ip(rule, &match); + inner_ecn_key = match.key->tos & INET_ECN_MASK; + inner_ecn_mask = match.mask->tos & INET_ECN_MASK; + } + + if (outer_ecn_mask != 0 && outer_ecn_mask != INET_ECN_MASK) { + NL_SET_ERR_MSG_MOD(extack, "Partial match on enc_tos ecn bits isn't supported"); + netdev_warn(priv->netdev, "Partial match on enc_tos ecn bits isn't supported"); + return -EOPNOTSUPP; + } + + if (!outer_ecn_mask) { + if (!inner_ecn_mask) + return 0; + + NL_SET_ERR_MSG_MOD(extack, + "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported"); + netdev_warn(priv->netdev, + "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported"); + return -EOPNOTSUPP; + } + + if (inner_ecn_mask && inner_ecn_mask != INET_ECN_MASK) { + NL_SET_ERR_MSG_MOD(extack, + "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported"); + netdev_warn(priv->netdev, + "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported"); + return -EOPNOTSUPP; + } + + if (!inner_ecn_mask) + return 0; + + /* Both inner and outer have full mask on ecn */ + + if (outer_ecn_key == INET_ECN_ECT_1) { + /* inner ecn might change by DECAP action */ + + NL_SET_ERR_MSG_MOD(extack, "Match on enc_tos ecn = ECT(1) isn't supported"); + netdev_warn(priv->netdev, "Match on enc_tos ecn = ECT(1) isn't supported"); + return -EOPNOTSUPP; + } + + if (outer_ecn_key != INET_ECN_CE) + return 0; + + if (inner_ecn_key != INET_ECN_CE) { + /* Can't happen in software, as packet ecn will be changed to CE after decap */ + NL_SET_ERR_MSG_MOD(extack, + "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported"); + netdev_warn(priv->netdev, + "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported"); + return -EOPNOTSUPP; + } + + /* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase, + * drop match on inner ecn + */ + *match_inner_ecn = false; + + return 0; +} + +static int parse_tunnel_attr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct net_device *filter_dev, + u8 *match_level, + bool *match_inner) +{ + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct netlink_ext_ack *extack = f->common.extack; + bool needs_mapping, sets_mapping; + int err; + + if (!mlx5e_is_eswitch_flow(flow)) { + NL_SET_ERR_MSG_MOD(extack, "Match on tunnel is not supported"); + return -EOPNOTSUPP; + } + + needs_mapping = !!flow->attr->chain; + sets_mapping = flow_requires_tunnel_mapping(flow->attr->chain, f); + *match_inner = !needs_mapping; + + if ((needs_mapping || sets_mapping) && + !mlx5_eswitch_reg_c1_loopback_enabled(esw)) { + NL_SET_ERR_MSG_MOD(extack, + "Chains on tunnel devices isn't supported without register loopback support"); + netdev_warn(priv->netdev, + "Chains on tunnel devices isn't supported without register loopback support"); + return -EOPNOTSUPP; + } + + if (!flow->attr->chain) { + err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, + match_level); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to parse tunnel attributes"); + netdev_warn(priv->netdev, + "Failed to parse tunnel attributes"); + return err; + } + + /* With mpls over udp we decapsulate using packet reformat + * object + */ + if (!netif_is_bareudp(filter_dev)) + flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; + err = mlx5e_tc_set_attr_rx_tun(flow, spec); + if (err) + return err; + } else if (tunnel) { + struct mlx5_flow_spec *tmp_spec; + + tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL); + if (!tmp_spec) { + NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for tunnel tmp spec"); + netdev_warn(priv->netdev, "Failed to allocate memory for tunnel tmp spec"); + return -ENOMEM; + } + memcpy(tmp_spec, spec, sizeof(*tmp_spec)); + + err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level); + if (err) { + kvfree(tmp_spec); + NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes"); + netdev_warn(priv->netdev, "Failed to parse tunnel attributes"); + return err; + } + err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec); + kvfree(tmp_spec); + if (err) + return err; + } + + if (!needs_mapping && !sets_mapping) + return 0; + + return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev); +} + +static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec) +{ + return MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + inner_headers); +} + +static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec) +{ + return MLX5_ADDR_OF(fte_match_param, spec->match_value, + inner_headers); +} + +static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec) +{ + return MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); +} + +static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec) +{ + return MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); +} + +void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec) +{ + return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? + get_match_inner_headers_value(spec) : + get_match_outer_headers_value(spec); +} + +void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec) +{ + return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? + get_match_inner_headers_criteria(spec) : + get_match_outer_headers_criteria(spec); +} + +static int mlx5e_flower_parse_meta(struct net_device *filter_dev, + struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct net_device *ingress_dev; + struct flow_match_meta match; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) + return 0; + + flow_rule_match_meta(rule, &match); + + if (match.mask->l2_miss) { + NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on \"l2_miss\""); + return -EOPNOTSUPP; + } + + if (!match.mask->ingress_ifindex) + return 0; + + if (match.mask->ingress_ifindex != 0xFFFFFFFF) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask"); + return -EOPNOTSUPP; + } + + ingress_dev = __dev_get_by_index(dev_net(filter_dev), + match.key->ingress_ifindex); + if (!ingress_dev) { + NL_SET_ERR_MSG_MOD(extack, + "Can't find the ingress port to match on"); + return -ENOENT; + } + + if (ingress_dev != filter_dev) { + NL_SET_ERR_MSG_MOD(extack, + "Can't match on the ingress filter port"); + return -EOPNOTSUPP; + } + + return 0; +} + +static bool skip_key_basic(struct net_device *filter_dev, + struct flow_cls_offload *f) +{ + /* When doing mpls over udp decap, the user needs to provide + * MPLS_UC as the protocol in order to be able to match on mpls + * label fields. However, the actual ethertype is IP so we want to + * avoid matching on this, otherwise we'll fail the match. + */ + if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0) + return true; + + return false; +} + +static int __parse_cls_flower(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct net_device *filter_dev, + u8 *inner_match_level, u8 *outer_match_level) +{ + struct netlink_ext_ack *extack = f->common.extack; + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + void *misc_c_3 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_3); + void *misc_v_3 = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_3); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_dissector *dissector = rule->match.dissector; + enum fs_flow_table_type fs_type; + bool match_inner_ecn = true; + u16 addr_type = 0; + u8 ip_proto = 0; + u8 *match_level; + int err; + + fs_type = mlx5e_is_eswitch_flow(flow) ? FS_FT_FDB : FS_FT_NIC_RX; + match_level = outer_match_level; + + if (dissector->used_keys & + ~(BIT_ULL(FLOW_DISSECTOR_KEY_META) | + BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) | + BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) | + BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) | + BIT_ULL(FLOW_DISSECTOR_KEY_CVLAN) | + BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | + BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | + BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) | + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) | + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS) | + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL) | + BIT_ULL(FLOW_DISSECTOR_KEY_TCP) | + BIT_ULL(FLOW_DISSECTOR_KEY_IP) | + BIT_ULL(FLOW_DISSECTOR_KEY_CT) | + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) | + BIT_ULL(FLOW_DISSECTOR_KEY_ENC_OPTS) | + BIT_ULL(FLOW_DISSECTOR_KEY_ICMP) | + BIT_ULL(FLOW_DISSECTOR_KEY_MPLS))) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); + netdev_dbg(priv->netdev, "Unsupported key used: 0x%llx\n", + dissector->used_keys); + return -EOPNOTSUPP; + } + + if (mlx5e_get_tc_tun(filter_dev)) { + bool match_inner = false; + + err = parse_tunnel_attr(priv, flow, spec, f, filter_dev, + outer_match_level, &match_inner); + if (err) + return err; + + if (match_inner) { + /* header pointers should point to the inner headers + * if the packet was decapsulated already. + * outer headers are set by parse_tunnel_attr. + */ + match_level = inner_match_level; + headers_c = get_match_inner_headers_criteria(spec); + headers_v = get_match_inner_headers_value(spec); + } + + err = mlx5e_tc_verify_tunnel_ecn(priv, f, &match_inner_ecn); + if (err) + return err; + } + + err = mlx5e_flower_parse_meta(filter_dev, f); + if (err) + return err; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) && + !skip_key_basic(filter_dev, f)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + mlx5e_tc_set_ethertype(priv->mdev, &match, + match_level == outer_match_level, + headers_c, headers_v); + + if (match.mask->n_proto) + *match_level = MLX5_MATCH_L2; + } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) || + is_vlan_dev(filter_dev)) { + struct flow_dissector_key_vlan filter_dev_mask; + struct flow_dissector_key_vlan filter_dev_key; + struct flow_match_vlan match; + + if (is_vlan_dev(filter_dev)) { + match.key = &filter_dev_key; + match.key->vlan_id = vlan_dev_vlan_id(filter_dev); + match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev); + match.key->vlan_priority = 0; + match.mask = &filter_dev_mask; + memset(match.mask, 0xff, sizeof(*match.mask)); + match.mask->vlan_priority = 0; + } else { + flow_rule_match_vlan(rule, &match); + } + if (match.mask->vlan_id || + match.mask->vlan_priority || + match.mask->vlan_tpid) { + if (match.key->vlan_tpid == htons(ETH_P_8021AD)) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + svlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + svlan_tag, 1); + } else { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + cvlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + cvlan_tag, 1); + } + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, + match.mask->vlan_id); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, + match.key->vlan_id); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, + match.mask->vlan_priority); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, + match.key->vlan_priority); + + *match_level = MLX5_MATCH_L2; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) && + match.mask->vlan_eth_type && + MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, + ft_field_support.outer_second_vid, + fs_type)) { + MLX5_SET(fte_match_set_misc, misc_c, + outer_second_cvlan_tag, 1); + spec->match_criteria_enable |= + MLX5_MATCH_MISC_PARAMETERS; + } + } + } else if (*match_level != MLX5_MATCH_NONE) { + /* cvlan_tag enabled in match criteria and + * disabled in match value means both S & C tags + * don't exist (untagged of both) + */ + MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); + *match_level = MLX5_MATCH_L2; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { + struct flow_match_vlan match; + + flow_rule_match_cvlan(rule, &match); + if (match.mask->vlan_id || + match.mask->vlan_priority || + match.mask->vlan_tpid) { + if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ft_field_support.outer_second_vid, + fs_type)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on CVLAN is not supported"); + return -EOPNOTSUPP; + } + + if (match.key->vlan_tpid == htons(ETH_P_8021AD)) { + MLX5_SET(fte_match_set_misc, misc_c, + outer_second_svlan_tag, 1); + MLX5_SET(fte_match_set_misc, misc_v, + outer_second_svlan_tag, 1); + } else { + MLX5_SET(fte_match_set_misc, misc_c, + outer_second_cvlan_tag, 1); + MLX5_SET(fte_match_set_misc, misc_v, + outer_second_cvlan_tag, 1); + } + + MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid, + match.mask->vlan_id); + MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid, + match.key->vlan_id); + MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio, + match.mask->vlan_priority); + MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio, + match.key->vlan_priority); + + *match_level = MLX5_MATCH_L2; + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; + + flow_rule_match_eth_addrs(rule, &match); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dmac_47_16), + match.mask->dst); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dmac_47_16), + match.key->dst); + + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + smac_47_16), + match.mask->src); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + smac_47_16), + match.key->src); + + if (!is_zero_ether_addr(match.mask->src) || + !is_zero_ether_addr(match.mask->dst)) + *match_level = MLX5_MATCH_L2; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; + + flow_rule_match_control(rule, &match); + addr_type = match.key->addr_type; + + /* the HW doesn't support frag first/later */ + if (match.mask->flags & FLOW_DIS_FIRST_FRAG) { + NL_SET_ERR_MSG_MOD(extack, "Match on frag first/later is not supported"); + return -EOPNOTSUPP; + } + + if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, + match.key->flags & FLOW_DIS_IS_FRAGMENT); + + /* the HW doesn't need L3 inline to match on frag=no */ + if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT)) + *match_level = MLX5_MATCH_L2; + /* *** L2 attributes parsing up to here *** */ + else + *match_level = MLX5_MATCH_L3; + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + ip_proto = match.key->ip_proto; + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, + match.mask->ip_proto); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, + match.key->ip_proto); + + if (match.mask->ip_proto) + *match_level = MLX5_MATCH_L3; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_ipv4_addrs(rule, &match); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &match.mask->src, sizeof(match.mask->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &match.key->src, sizeof(match.key->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &match.mask->dst, sizeof(match.mask->dst)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &match.key->dst, sizeof(match.key->dst)); + + if (match.mask->src || match.mask->dst) + *match_level = MLX5_MATCH_L3; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_ipv6_addrs(rule, &match); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.mask->src, sizeof(match.mask->src)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &match.key->src, sizeof(match.key->src)); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.mask->dst, sizeof(match.mask->dst)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &match.key->dst, sizeof(match.key->dst)); + + if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY || + ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY) + *match_level = MLX5_MATCH_L3; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { + struct flow_match_ip match; + + flow_rule_match_ip(rule, &match); + if (match_inner_ecn) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, + match.mask->tos & 0x3); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, + match.key->tos & 0x3); + } + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, + match.mask->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, + match.key->tos >> 2); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, + match.mask->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, + match.key->ttl); + + if (match.mask->ttl && + !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.outer_ipv4_ttl)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on TTL is not supported"); + return -EOPNOTSUPP; + } + + if (match.mask->tos || match.mask->ttl) + *match_level = MLX5_MATCH_L3; + } + + /* *** L3 attributes parsing up to here *** */ + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; + + flow_rule_match_ports(rule, &match); + switch (ip_proto) { + case IPPROTO_TCP: + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + tcp_sport, ntohs(match.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + tcp_sport, ntohs(match.key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + tcp_dport, ntohs(match.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + tcp_dport, ntohs(match.key->dst)); + break; + + case IPPROTO_UDP: + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_sport, ntohs(match.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_sport, ntohs(match.key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_dport, ntohs(match.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_dport, ntohs(match.key->dst)); + break; + default: + NL_SET_ERR_MSG_MOD(extack, + "Only UDP and TCP transports are supported for L4 matching"); + netdev_err(priv->netdev, + "Only UDP and TCP transport are supported\n"); + return -EINVAL; + } + + if (match.mask->src || match.mask->dst) + *match_level = MLX5_MATCH_L4; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { + struct flow_match_tcp match; + + flow_rule_match_tcp(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, + ntohs(match.mask->flags)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, + ntohs(match.key->flags)); + + if (match.mask->flags) + *match_level = MLX5_MATCH_L4; + } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) { + struct flow_match_icmp match; + + flow_rule_match_icmp(rule, &match); + switch (ip_proto) { + case IPPROTO_ICMP: + if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & + MLX5_FLEX_PROTO_ICMP)) { + NL_SET_ERR_MSG_MOD(extack, + "Match on Flex protocols for ICMP is not supported"); + return -EOPNOTSUPP; + } + MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_type, + match.mask->type); + MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_type, + match.key->type); + MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_code, + match.mask->code); + MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_code, + match.key->code); + break; + case IPPROTO_ICMPV6: + if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & + MLX5_FLEX_PROTO_ICMPV6)) { + NL_SET_ERR_MSG_MOD(extack, + "Match on Flex protocols for ICMPV6 is not supported"); + return -EOPNOTSUPP; + } + MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_type, + match.mask->type); + MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_type, + match.key->type); + MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_code, + match.mask->code); + MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_code, + match.key->code); + break; + default: + NL_SET_ERR_MSG_MOD(extack, + "Code and type matching only with ICMP and ICMPv6"); + netdev_err(priv->netdev, + "Code and type matching only with ICMP and ICMPv6\n"); + return -EINVAL; + } + if (match.mask->code || match.mask->type) { + *match_level = MLX5_MATCH_L4; + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3; + } + } + /* Currently supported only for MPLS over UDP */ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) && + !netif_is_bareudp(filter_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on MPLS is supported only for MPLS over UDP"); + netdev_err(priv->netdev, + "Matching on MPLS is supported only for MPLS over UDP\n"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int parse_cls_flower(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct net_device *filter_dev) +{ + u8 inner_match_level, outer_match_level, non_tunnel_match_level; + struct netlink_ext_ack *extack = f->common.extack; + struct mlx5_core_dev *dev = priv->mdev; + struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep; + bool is_eswitch_flow; + int err; + + inner_match_level = MLX5_MATCH_NONE; + outer_match_level = MLX5_MATCH_NONE; + + err = __parse_cls_flower(priv, flow, spec, f, filter_dev, + &inner_match_level, &outer_match_level); + non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ? + outer_match_level : inner_match_level; + + is_eswitch_flow = mlx5e_is_eswitch_flow(flow); + if (!err && is_eswitch_flow) { + rep = rpriv->rep; + if (rep->vport != MLX5_VPORT_UPLINK && + (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && + esw->offloads.inline_mode < non_tunnel_match_level)) { + NL_SET_ERR_MSG_MOD(extack, + "Flow is not offloaded due to min inline setting"); + netdev_warn(priv->netdev, + "Flow is not offloaded due to min inline setting, required %d actual %d\n", + non_tunnel_match_level, esw->offloads.inline_mode); + return -EOPNOTSUPP; + } + } + + flow->attr->inner_match_level = inner_match_level; + flow->attr->outer_match_level = outer_match_level; + + + return err; +} + +struct mlx5_fields { + u8 field; + u8 field_bsize; + u32 field_mask; + u32 offset; + u32 match_offset; +}; + +#define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \ + {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \ + offsetof(struct pedit_headers, field) + (off), \ + MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)} + +/* masked values are the same and there are no rewrites that do not have a + * match. + */ +#define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \ + type matchmaskx = *(type *)(matchmaskp); \ + type matchvalx = *(type *)(matchvalp); \ + type maskx = *(type *)(maskp); \ + type valx = *(type *)(valp); \ + \ + (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \ + matchmaskx)); \ +}) + +static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp, + void *matchmaskp, u8 bsize) +{ + bool same = false; + + switch (bsize) { + case 8: + same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp); + break; + case 16: + same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp); + break; + case 32: + same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp); + break; + } + + return same; +} + +static struct mlx5_fields fields[] = { + OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16), + OFFLOAD(DMAC_15_0, 16, U16_MAX, eth.h_dest[4], 0, dmac_15_0), + OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16), + OFFLOAD(SMAC_15_0, 16, U16_MAX, eth.h_source[4], 0, smac_15_0), + OFFLOAD(ETHERTYPE, 16, U16_MAX, eth.h_proto, 0, ethertype), + OFFLOAD(FIRST_VID, 16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid), + + OFFLOAD(IP_DSCP, 8, 0xfc, ip4.tos, 0, ip_dscp), + OFFLOAD(IP_TTL, 8, U8_MAX, ip4.ttl, 0, ttl_hoplimit), + OFFLOAD(SIPV4, 32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4), + OFFLOAD(DIPV4, 32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + + OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0, + src_ipv4_src_ipv6.ipv6_layout.ipv6[0]), + OFFLOAD(SIPV6_95_64, 32, U32_MAX, ip6.saddr.s6_addr32[1], 0, + src_ipv4_src_ipv6.ipv6_layout.ipv6[4]), + OFFLOAD(SIPV6_63_32, 32, U32_MAX, ip6.saddr.s6_addr32[2], 0, + src_ipv4_src_ipv6.ipv6_layout.ipv6[8]), + OFFLOAD(SIPV6_31_0, 32, U32_MAX, ip6.saddr.s6_addr32[3], 0, + src_ipv4_src_ipv6.ipv6_layout.ipv6[12]), + OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]), + OFFLOAD(DIPV6_95_64, 32, U32_MAX, ip6.daddr.s6_addr32[1], 0, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]), + OFFLOAD(DIPV6_63_32, 32, U32_MAX, ip6.daddr.s6_addr32[2], 0, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]), + OFFLOAD(DIPV6_31_0, 32, U32_MAX, ip6.daddr.s6_addr32[3], 0, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]), + OFFLOAD(IPV6_HOPLIMIT, 8, U8_MAX, ip6.hop_limit, 0, ttl_hoplimit), + OFFLOAD(IP_DSCP, 16, 0x0fc0, ip6, 0, ip_dscp), + + OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source, 0, tcp_sport), + OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest, 0, tcp_dport), + /* in linux iphdr tcp_flags is 8 bits long */ + OFFLOAD(TCP_FLAGS, 8, U8_MAX, tcp.ack_seq, 5, tcp_flags), + + OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport), + OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport), +}; + +static u32 mask_field_get(void *mask, struct mlx5_fields *f) +{ + switch (f->field_bsize) { + case 32: + return be32_to_cpu(*(__be32 *)mask) & f->field_mask; + case 16: + return be16_to_cpu(*(__be16 *)mask) & (u16)f->field_mask; + default: + return *(u8 *)mask & (u8)f->field_mask; + } +} + +static void mask_field_clear(void *mask, struct mlx5_fields *f) +{ + switch (f->field_bsize) { + case 32: + *(__be32 *)mask &= ~cpu_to_be32(f->field_mask); + break; + case 16: + *(__be16 *)mask &= ~cpu_to_be16((u16)f->field_mask); + break; + default: + *(u8 *)mask &= ~(u8)f->field_mask; + break; + } +} + +static int offload_pedit_fields(struct mlx5e_priv *priv, + int namespace, + struct mlx5e_tc_flow_parse_attr *parse_attr, + u32 *action_flags, + struct netlink_ext_ack *extack) +{ + struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; + struct pedit_headers_action *hdrs = parse_attr->hdrs; + void *headers_c, *headers_v, *action, *vals_p; + struct mlx5e_tc_mod_hdr_acts *mod_acts; + void *s_masks_p, *a_masks_p; + int i, first, last, next_z; + struct mlx5_fields *f; + unsigned long mask; + u32 s_mask, a_mask; + u8 cmd; + + mod_acts = &parse_attr->mod_hdr_acts; + headers_c = mlx5e_get_match_headers_criteria(*action_flags, &parse_attr->spec); + headers_v = mlx5e_get_match_headers_value(*action_flags, &parse_attr->spec); + + set_masks = &hdrs[0].masks; + add_masks = &hdrs[1].masks; + set_vals = &hdrs[0].vals; + add_vals = &hdrs[1].vals; + + for (i = 0; i < ARRAY_SIZE(fields); i++) { + bool skip; + + f = &fields[i]; + s_masks_p = (void *)set_masks + f->offset; + a_masks_p = (void *)add_masks + f->offset; + + s_mask = mask_field_get(s_masks_p, f); + a_mask = mask_field_get(a_masks_p, f); + + if (!s_mask && !a_mask) /* nothing to offload here */ + continue; + + if (s_mask && a_mask) { + NL_SET_ERR_MSG_MOD(extack, + "can't set and add to the same HW field"); + netdev_warn(priv->netdev, + "mlx5: can't set and add to the same HW field (%x)\n", + f->field); + return -EOPNOTSUPP; + } + + skip = false; + if (s_mask) { + void *match_mask = headers_c + f->match_offset; + void *match_val = headers_v + f->match_offset; + + cmd = MLX5_ACTION_TYPE_SET; + mask = s_mask; + vals_p = (void *)set_vals + f->offset; + /* don't rewrite if we have a match on the same value */ + if (cmp_val_mask(vals_p, s_masks_p, match_val, + match_mask, f->field_bsize)) + skip = true; + /* clear to denote we consumed this field */ + mask_field_clear(s_masks_p, f); + } else { + cmd = MLX5_ACTION_TYPE_ADD; + mask = a_mask; + vals_p = (void *)add_vals + f->offset; + /* add 0 is no change */ + if (!mask_field_get(vals_p, f)) + skip = true; + /* clear to denote we consumed this field */ + mask_field_clear(a_masks_p, f); + } + if (skip) + continue; + + first = find_first_bit(&mask, f->field_bsize); + next_z = find_next_zero_bit(&mask, f->field_bsize, first); + last = find_last_bit(&mask, f->field_bsize); + if (first < next_z && next_z < last) { + NL_SET_ERR_MSG_MOD(extack, + "rewrite of few sub-fields isn't supported"); + netdev_warn(priv->netdev, + "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n", + mask); + return -EOPNOTSUPP; + } + + action = mlx5e_mod_hdr_alloc(priv->mdev, namespace, mod_acts); + if (IS_ERR(action)) { + NL_SET_ERR_MSG_MOD(extack, + "too many pedit actions, can't offload"); + mlx5_core_warn(priv->mdev, + "mlx5: parsed %d pedit actions, can't do more\n", + mod_acts->num_actions); + return PTR_ERR(action); + } + + MLX5_SET(set_action_in, action, action_type, cmd); + MLX5_SET(set_action_in, action, field, f->field); + + if (cmd == MLX5_ACTION_TYPE_SET) { + unsigned long field_mask = f->field_mask; + int start; + + /* if field is bit sized it can start not from first bit */ + start = find_first_bit(&field_mask, f->field_bsize); + + MLX5_SET(set_action_in, action, offset, first - start); + /* length is num of bits to be written, zero means length of 32 */ + MLX5_SET(set_action_in, action, length, (last - first + 1)); + } + + if (f->field_bsize == 32) + MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first); + else if (f->field_bsize == 16) + MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first); + else if (f->field_bsize == 8) + MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first); + + ++mod_acts->num_actions; + } + + return 0; +} + +static const struct pedit_headers zero_masks = {}; + +static int verify_offload_pedit_fields(struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct netlink_ext_ack *extack) +{ + struct pedit_headers *cmd_masks; + u8 cmd; + + for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) { + cmd_masks = &parse_attr->hdrs[cmd].masks; + if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) { + NL_SET_ERR_MSG_MOD(extack, "attempt to offload an unsupported field"); + netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd); + print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS, + 16, 1, cmd_masks, sizeof(zero_masks), true); + return -EOPNOTSUPP; + } + } + + return 0; +} + +static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, + struct mlx5e_tc_flow_parse_attr *parse_attr, + u32 *action_flags, + struct netlink_ext_ack *extack) +{ + int err; + + err = offload_pedit_fields(priv, namespace, parse_attr, action_flags, extack); + if (err) + goto out_dealloc_parsed_actions; + + err = verify_offload_pedit_fields(priv, parse_attr, extack); + if (err) + goto out_dealloc_parsed_actions; + + return 0; + +out_dealloc_parsed_actions: + mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); + return err; +} + +struct ip_ttl_word { + __u8 ttl; + __u8 protocol; + __sum16 check; +}; + +struct ipv6_hoplimit_word { + __be16 payload_len; + __u8 nexthdr; + __u8 hop_limit; +}; + +static bool +is_flow_action_modify_ip_header(struct flow_action *flow_action) +{ + const struct flow_action_entry *act; + u32 mask, offset; + u8 htype; + int i; + + /* For IPv4 & IPv6 header check 4 byte word, + * to determine that modified fields + * are NOT ttl & hop_limit only. + */ + flow_action_for_each(i, act, flow_action) { + if (act->id != FLOW_ACTION_MANGLE && + act->id != FLOW_ACTION_ADD) + continue; + + htype = act->mangle.htype; + offset = act->mangle.offset; + mask = ~act->mangle.mask; + + if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) { + struct ip_ttl_word *ttl_word = + (struct ip_ttl_word *)&mask; + + if (offset != offsetof(struct iphdr, ttl) || + ttl_word->protocol || + ttl_word->check) + return true; + } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) { + struct ipv6_hoplimit_word *hoplimit_word = + (struct ipv6_hoplimit_word *)&mask; + + if (offset != offsetof(struct ipv6hdr, payload_len) || + hoplimit_word->payload_len || + hoplimit_word->nexthdr) + return true; + } + } + + return false; +} + +static bool modify_header_match_supported(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_action *flow_action, + u32 actions, + struct netlink_ext_ack *extack) +{ + bool modify_ip_header; + void *headers_c; + void *headers_v; + u16 ethertype; + u8 ip_proto; + + headers_c = mlx5e_get_match_headers_criteria(actions, spec); + headers_v = mlx5e_get_match_headers_value(actions, spec); + ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); + + /* for non-IP we only re-write MACs, so we're okay */ + if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 && + ethertype != ETH_P_IP && ethertype != ETH_P_IPV6) + goto out_ok; + + modify_ip_header = is_flow_action_modify_ip_header(flow_action); + ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol); + if (modify_ip_header && ip_proto != IPPROTO_TCP && + ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload re-write of non TCP/UDP"); + netdev_info(priv->netdev, "can't offload re-write of ip proto %d\n", + ip_proto); + return false; + } + +out_ok: + return true; +} + +static bool +actions_match_supported_fdb(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; + + if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) { + NL_SET_ERR_MSG_MOD(extack, + "current firmware doesn't support split rule for port mirroring"); + netdev_warn_once(priv->netdev, + "current firmware doesn't support split rule for port mirroring\n"); + return false; + } + + return true; +} + +static bool +actions_match_supported(struct mlx5e_priv *priv, + struct flow_action *flow_action, + u32 actions, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && + !modify_header_match_supported(priv, &parse_attr->spec, flow_action, actions, + extack)) + return false; + + if (mlx5e_is_eswitch_flow(flow) && + !actions_match_supported_fdb(priv, flow, extack)) + return false; + + return true; +} + +static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) +{ + return priv->mdev == peer_priv->mdev; +} + +bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) +{ + struct mlx5_core_dev *fmdev, *pmdev; + u64 fsystem_guid, psystem_guid; + + fmdev = priv->mdev; + pmdev = peer_priv->mdev; + + fsystem_guid = mlx5_query_nic_system_image_guid(fmdev); + psystem_guid = mlx5_query_nic_system_image_guid(pmdev); + + return (fsystem_guid == psystem_guid); +} + +static int +actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct netlink_ext_ack *extack) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; + struct pedit_headers_action *hdrs = parse_attr->hdrs; + enum mlx5_flow_namespace_type ns_type; + int err; + + if (!hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits && + !hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) + return 0; + + ns_type = mlx5e_get_flow_namespace(flow); + + err = alloc_tc_pedit_action(priv, ns_type, parse_attr, &attr->action, extack); + if (err) + return err; + + if (parse_attr->mod_hdr_acts.num_actions > 0) + return 0; + + /* In case all pedit actions are skipped, remove the MOD_HDR flag. */ + attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); + + if (ns_type != MLX5_FLOW_NAMESPACE_FDB) + return 0; + + if (!((attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || + (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))) + attr->esw_attr->split_count = 0; + + return 0; +} + +static struct mlx5_flow_attr* +mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr, + enum mlx5_flow_namespace_type ns_type) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr; + u32 attr_sz = ns_to_attr_sz(ns_type); + struct mlx5_flow_attr *attr2; + + attr2 = mlx5_alloc_flow_attr(ns_type); + parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); + if (!attr2 || !parse_attr) { + kvfree(parse_attr); + kfree(attr2); + return NULL; + } + + memcpy(attr2, attr, attr_sz); + INIT_LIST_HEAD(&attr2->list); + parse_attr->filter_dev = attr->parse_attr->filter_dev; + attr2->action = 0; + attr2->counter = NULL; + attr2->tc_act_cookies_count = 0; + attr2->flags = 0; + attr2->parse_attr = parse_attr; + attr2->dest_chain = 0; + attr2->dest_ft = NULL; + attr2->act_id_restore_rule = NULL; + memset(&attr2->ct_attr, 0, sizeof(attr2->ct_attr)); + + if (ns_type == MLX5_FLOW_NAMESPACE_FDB) { + attr2->esw_attr->out_count = 0; + attr2->esw_attr->split_count = 0; + } + + attr2->branch_true = NULL; + attr2->branch_false = NULL; + attr2->jumping_attr = NULL; + return attr2; +} + +struct mlx5_flow_attr * +mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow) +{ + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5_flow_attr *attr; + int i; + + list_for_each_entry(attr, &flow->attrs, list) { + esw_attr = attr->esw_attr; + for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { + if (esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) + return attr; + } + } + + return NULL; +} + +void +mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow) +{ + struct mlx5e_post_act *post_act = get_post_action(flow->priv); + struct mlx5_flow_attr *attr; + + list_for_each_entry(attr, &flow->attrs, list) { + if (list_is_last(&attr->list, &flow->attrs)) + break; + + mlx5e_tc_post_act_unoffload(post_act, attr->post_act_handle); + } +} + +static void +free_flow_post_acts(struct mlx5e_tc_flow *flow) +{ + struct mlx5_flow_attr *attr, *tmp; + + list_for_each_entry_safe(attr, tmp, &flow->attrs, list) { + if (list_is_last(&attr->list, &flow->attrs)) + break; + + mlx5_free_flow_attr_actions(flow, attr); + + list_del(&attr->list); + kvfree(attr->parse_attr); + kfree(attr); + } +} + +int +mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow) +{ + struct mlx5e_post_act *post_act = get_post_action(flow->priv); + struct mlx5_flow_attr *attr; + int err = 0; + + list_for_each_entry(attr, &flow->attrs, list) { + if (list_is_last(&attr->list, &flow->attrs)) + break; + + err = mlx5e_tc_post_act_offload(post_act, attr->post_act_handle); + if (err) + break; + } + + return err; +} + +/* TC filter rule HW translation: + * + * +---------------------+ + * + ft prio (tc chain) + + * + original match + + * +---------------------+ + * | + * | if multi table action + * | + * v + * +---------------------+ + * + post act ft |<----. + * + match fte id | | split on multi table action + * + do actions |-----' + * +---------------------+ + * | + * | + * v + * Do rest of the actions after last multi table action. + */ +static int +alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) +{ + struct mlx5e_post_act *post_act = get_post_action(flow->priv); + struct mlx5_flow_attr *attr, *next_attr = NULL; + struct mlx5e_post_act_handle *handle; + int err; + + /* This is going in reverse order as needed. + * The first entry is the last attribute. + */ + list_for_each_entry(attr, &flow->attrs, list) { + if (!next_attr) { + /* Set counter action on last post act rule. */ + attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + } + + if (next_attr && !(attr->flags & MLX5_ATTR_FLAG_TERMINATING)) { + err = mlx5e_tc_act_set_next_post_act(flow, attr, next_attr); + if (err) + goto out_free; + } + + /* Don't add post_act rule for first attr (last in the list). + * It's being handled by the caller. + */ + if (list_is_last(&attr->list, &flow->attrs)) + break; + + err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack); + if (err) + goto out_free; + + err = post_process_attr(flow, attr, extack); + if (err) + goto out_free; + + handle = mlx5e_tc_post_act_add(post_act, attr); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto out_free; + } + + attr->post_act_handle = handle; + + if (attr->jumping_attr) { + err = mlx5e_tc_act_set_next_post_act(flow, attr->jumping_attr, attr); + if (err) + goto out_free; + } + + next_attr = attr; + } + + if (flow_flag_test(flow, SLOW)) + goto out; + + err = mlx5e_tc_offload_flow_post_acts(flow); + if (err) + goto out_free; + +out: + return 0; + +out_free: + free_flow_post_acts(flow); + return err; +} + +static int +set_branch_dest_ft(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) +{ + struct mlx5e_post_act *post_act = get_post_action(priv); + + if (IS_ERR(post_act)) + return PTR_ERR(post_act); + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->dest_ft = mlx5e_tc_post_act_get_ft(post_act); + + return 0; +} + +static int +alloc_branch_attr(struct mlx5e_tc_flow *flow, + struct mlx5e_tc_act_branch_ctrl *cond, + struct mlx5_flow_attr **cond_attr, + u32 *jump_count, + struct netlink_ext_ack *extack) +{ + struct mlx5_flow_attr *attr; + int err = 0; + + *cond_attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, + mlx5e_get_flow_namespace(flow)); + if (!(*cond_attr)) + return -ENOMEM; + + attr = *cond_attr; + + switch (cond->act_id) { + case FLOW_ACTION_DROP: + attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; + break; + case FLOW_ACTION_ACCEPT: + case FLOW_ACTION_PIPE: + err = set_branch_dest_ft(flow->priv, attr); + if (err) + goto out_err; + break; + case FLOW_ACTION_JUMP: + if (*jump_count) { + NL_SET_ERR_MSG_MOD(extack, "Cannot offload flows with nested jumps"); + err = -EOPNOTSUPP; + goto out_err; + } + *jump_count = cond->extval; + err = set_branch_dest_ft(flow->priv, attr); + if (err) + goto out_err; + break; + default: + err = -EOPNOTSUPP; + goto out_err; + } + + return err; +out_err: + kfree(*cond_attr); + *cond_attr = NULL; + return err; +} + +static void +dec_jump_count(struct flow_action_entry *act, struct mlx5e_tc_act *tc_act, + struct mlx5_flow_attr *attr, struct mlx5e_priv *priv, + struct mlx5e_tc_jump_state *jump_state) +{ + if (!jump_state->jump_count) + return; + + /* Single tc action can instantiate multiple offload actions (e.g. pedit) + * Jump only over a tc action + */ + if (act->id == jump_state->last_id && act->hw_index == jump_state->last_index) + return; + + jump_state->last_id = act->id; + jump_state->last_index = act->hw_index; + + /* nothing to do for intermediate actions */ + if (--jump_state->jump_count > 1) + return; + + if (jump_state->jump_count == 1) { /* last action in the jump action list */ + + /* create a new attribute after this action */ + jump_state->jump_target = true; + + if (tc_act->is_terminating_action) { /* the branch ends here */ + attr->flags |= MLX5_ATTR_FLAG_TERMINATING; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + } else { /* the branch continues executing the rest of the actions */ + struct mlx5e_post_act *post_act; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + post_act = get_post_action(priv); + attr->dest_ft = mlx5e_tc_post_act_get_ft(post_act); + } + } else if (jump_state->jump_count == 0) { /* first attr after the jump action list */ + /* This is the post action for the jumping attribute (either red or green) + * Use the stored jumping_attr to set the post act id on the jumping attribute + */ + attr->jumping_attr = jump_state->jumping_attr; + } +} + +static int +parse_branch_ctrl(struct flow_action_entry *act, struct mlx5e_tc_act *tc_act, + struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr, + struct mlx5e_tc_jump_state *jump_state, + struct netlink_ext_ack *extack) +{ + struct mlx5e_tc_act_branch_ctrl cond_true, cond_false; + u32 jump_count = jump_state->jump_count; + int err; + + if (!tc_act->get_branch_ctrl) + return 0; + + tc_act->get_branch_ctrl(act, &cond_true, &cond_false); + + err = alloc_branch_attr(flow, &cond_true, + &attr->branch_true, &jump_count, extack); + if (err) + goto out_err; + + if (jump_count) + jump_state->jumping_attr = attr->branch_true; + + err = alloc_branch_attr(flow, &cond_false, + &attr->branch_false, &jump_count, extack); + if (err) + goto err_branch_false; + + if (jump_count && !jump_state->jumping_attr) + jump_state->jumping_attr = attr->branch_false; + + jump_state->jump_count = jump_count; + + /* branching action requires its own counter */ + attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + flow_flag_set(flow, USE_ACT_STATS); + + return 0; + +err_branch_false: + free_branch_attr(flow, attr->branch_true); +out_err: + return err; +} + +static int +parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, + struct flow_action *flow_action) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow *flow = parse_state->flow; + struct mlx5e_tc_jump_state jump_state = {}; + struct mlx5_flow_attr *attr = flow->attr; + enum mlx5_flow_namespace_type ns_type; + struct mlx5e_priv *priv = flow->priv; + struct mlx5_flow_attr *prev_attr; + struct flow_action_entry *act; + struct mlx5e_tc_act *tc_act; + int err, i, i_split = 0; + bool is_missable; + + ns_type = mlx5e_get_flow_namespace(flow); + list_add(&attr->list, &flow->attrs); + + flow_action_for_each(i, act, flow_action) { + jump_state.jump_target = false; + is_missable = false; + prev_attr = attr; + + tc_act = mlx5e_tc_act_get(act->id, ns_type); + if (!tc_act) { + NL_SET_ERR_MSG_MOD(extack, "Not implemented offload action"); + err = -EOPNOTSUPP; + goto out_free_post_acts; + } + + if (tc_act->can_offload && !tc_act->can_offload(parse_state, act, i, attr)) { + err = -EOPNOTSUPP; + goto out_free_post_acts; + } + + err = tc_act->parse_action(parse_state, act, priv, attr); + if (err) + goto out_free_post_acts; + + dec_jump_count(act, tc_act, attr, priv, &jump_state); + + err = parse_branch_ctrl(act, tc_act, flow, attr, &jump_state, extack); + if (err) + goto out_free_post_acts; + + parse_state->actions |= attr->action; + + /* Split attr for multi table act if not the last act. */ + if (jump_state.jump_target || + (tc_act->is_multi_table_act && + tc_act->is_multi_table_act(priv, act, attr) && + i < flow_action->num_entries - 1)) { + is_missable = tc_act->is_missable ? tc_act->is_missable(act) : false; + + err = mlx5e_tc_act_post_parse(parse_state, flow_action, i_split, i, attr, + ns_type); + if (err) + goto out_free_post_acts; + + attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, ns_type); + if (!attr) { + err = -ENOMEM; + goto out_free_post_acts; + } + + i_split = i + 1; + parse_state->if_count = 0; + list_add(&attr->list, &flow->attrs); + } + + if (is_missable) { + /* Add counter to prev, and assign act to new (next) attr */ + prev_attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + flow_flag_set(flow, USE_ACT_STATS); + + attr->tc_act_cookies[attr->tc_act_cookies_count++] = act->cookie; + } else if (!tc_act->stats_action) { + prev_attr->tc_act_cookies[prev_attr->tc_act_cookies_count++] = act->cookie; + } + } + + err = mlx5e_tc_act_post_parse(parse_state, flow_action, i_split, i, attr, ns_type); + if (err) + goto out_free_post_acts; + + err = alloc_flow_post_acts(flow, extack); + if (err) + goto out_free_post_acts; + + return 0; + +out_free_post_acts: + free_flow_post_acts(flow); + + return err; +} + +static int +flow_action_supported(struct flow_action *flow_action, + struct netlink_ext_ack *extack) +{ + if (!flow_action_has_entries(flow_action)) { + NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries"); + return -EINVAL; + } + + if (!flow_action_hw_stats_check(flow_action, extack, + FLOW_ACTION_HW_STATS_DELAYED_BIT)) { + NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int +parse_tc_nic_actions(struct mlx5e_priv *priv, + struct flow_action *flow_action, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + struct mlx5e_tc_act_parse_state *parse_state; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; + int err; + + err = flow_action_supported(flow_action, extack); + if (err) + return err; + + attr->nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + parse_attr = attr->parse_attr; + parse_state = &parse_attr->parse_state; + mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack); + parse_state->ct_priv = get_ct_priv(priv); + + err = parse_tc_actions(parse_state, flow_action); + if (err) + return err; + + err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack); + if (err) + return err; + + err = verify_attr_actions(attr->action, extack); + if (err) + return err; + + if (!actions_match_supported(priv, flow_action, parse_state->actions, + parse_attr, flow, extack)) + return -EOPNOTSUPP; + + return 0; +} + +static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv, + struct net_device *peer_netdev) +{ + struct mlx5e_priv *peer_priv; + + peer_priv = netdev_priv(peer_netdev); + + return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) && + mlx5e_eswitch_vf_rep(priv->netdev) && + mlx5e_eswitch_vf_rep(peer_netdev) && + mlx5e_same_hw_devs(priv, peer_priv)); +} + +static bool same_hw_reps(struct mlx5e_priv *priv, + struct net_device *peer_netdev) +{ + struct mlx5e_priv *peer_priv; + + peer_priv = netdev_priv(peer_netdev); + + return mlx5e_eswitch_rep(priv->netdev) && + mlx5e_eswitch_rep(peer_netdev) && + mlx5e_same_hw_devs(priv, peer_priv); +} + +static bool is_lag_dev(struct mlx5e_priv *priv, + struct net_device *peer_netdev) +{ + return ((mlx5_lag_is_sriov(priv->mdev) || + mlx5_lag_is_multipath(priv->mdev)) && + same_hw_reps(priv, peer_netdev)); +} + +static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev) +{ + return same_hw_reps(priv, out_dev) && mlx5_lag_is_mpesw(priv->mdev); +} + +bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, + struct net_device *out_dev) +{ + if (is_merged_eswitch_vfs(priv, out_dev)) + return true; + + if (is_multiport_eligible(priv, out_dev)) + return true; + + if (is_lag_dev(priv, out_dev)) + return true; + + return mlx5e_eswitch_rep(out_dev) && + same_port_devs(priv, netdev_priv(out_dev)); +} + +int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + int ifindex, + enum mlx5e_tc_int_port_type type, + u32 *action, + int out_index) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5e_tc_int_port_priv *int_port_priv; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5e_tc_int_port *dest_int_port; + int err; + + parse_attr = attr->parse_attr; + int_port_priv = mlx5e_get_int_port_priv(priv); + + dest_int_port = mlx5e_tc_int_port_get(int_port_priv, ifindex, type); + if (IS_ERR(dest_int_port)) + return PTR_ERR(dest_int_port); + + err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts, + MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG, + mlx5e_tc_int_port_get_metadata(dest_int_port)); + if (err) { + mlx5e_tc_int_port_put(int_port_priv, dest_int_port); + return err; + } + + *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + esw_attr->dest_int_port = dest_int_port; + esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE; + esw_attr->split_count = out_index; + + /* Forward to root fdb for matching against the new source vport */ + attr->dest_chain = 0; + + return 0; +} + +static int +parse_tc_fdb_actions(struct mlx5e_priv *priv, + struct flow_action *flow_action, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + struct mlx5e_tc_act_parse_state *parse_state; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; + struct mlx5_esw_flow_attr *esw_attr; + struct net_device *filter_dev; + int err; + + err = flow_action_supported(flow_action, extack); + if (err) + return err; + + esw_attr = attr->esw_attr; + parse_attr = attr->parse_attr; + filter_dev = parse_attr->filter_dev; + parse_state = &parse_attr->parse_state; + mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack); + parse_state->ct_priv = get_ct_priv(priv); + + err = parse_tc_actions(parse_state, flow_action); + if (err) + return err; + + /* Forward to/from internal port can only have 1 dest */ + if ((netif_is_ovs_master(filter_dev) || esw_attr->dest_int_port) && + esw_attr->out_count > 1) { + NL_SET_ERR_MSG_MOD(extack, + "Rules with internal port can have only one destination"); + return -EOPNOTSUPP; + } + + /* Forward from tunnel/internal port to internal port is not supported */ + if ((mlx5e_get_tc_tun(filter_dev) || netif_is_ovs_master(filter_dev)) && + esw_attr->dest_int_port) { + NL_SET_ERR_MSG_MOD(extack, + "Forwarding from tunnel/internal port to internal port is not supported"); + return -EOPNOTSUPP; + } + + err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack); + if (err) + return err; + + if (!actions_match_supported(priv, flow_action, parse_state->actions, + parse_attr, flow, extack)) + return -EOPNOTSUPP; + + return 0; +} + +static void get_flags(int flags, unsigned long *flow_flags) +{ + unsigned long __flow_flags = 0; + + if (flags & MLX5_TC_FLAG(INGRESS)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS); + if (flags & MLX5_TC_FLAG(EGRESS)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS); + + if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); + if (flags & MLX5_TC_FLAG(NIC_OFFLOAD)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); + if (flags & MLX5_TC_FLAG(FT_OFFLOAD)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT); + + *flow_flags = __flow_flags; +} + +static const struct rhashtable_params tc_ht_params = { + .head_offset = offsetof(struct mlx5e_tc_flow, node), + .key_offset = offsetof(struct mlx5e_tc_flow, cookie), + .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie), + .automatic_shrinking = true, +}; + +static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, + unsigned long flags) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5e_rep_priv *rpriv; + + if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) { + rpriv = priv->ppriv; + return &rpriv->tc_ht; + } else /* NIC offload */ + return &tc->ht; +} + +static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) +{ + struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; + struct mlx5_flow_attr *attr = flow->attr; + bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK && + flow_flag_test(flow, INGRESS); + bool act_is_encap = !!(attr->action & + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT); + bool esw_paired = mlx5_devcom_comp_is_ready(esw_attr->in_mdev->priv.eswitch->devcom); + + if (!esw_paired) + return false; + + if ((mlx5_lag_is_sriov(esw_attr->in_mdev) || + mlx5_lag_is_multipath(esw_attr->in_mdev)) && + (is_rep_ingress || act_is_encap)) + return true; + + if (mlx5_lag_is_mpesw(esw_attr->in_mdev)) + return true; + + return false; +} + +struct mlx5_flow_attr * +mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type) +{ + u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB) ? + sizeof(struct mlx5_esw_flow_attr) : + sizeof(struct mlx5_nic_flow_attr); + struct mlx5_flow_attr *attr; + + attr = kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL); + if (!attr) + return attr; + + INIT_LIST_HEAD(&attr->list); + return attr; +} + +static void +mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr) +{ + struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow); + struct mlx5_esw_flow_attr *esw_attr; + + if (!attr) + return; + + if (attr->post_act_handle) + mlx5e_tc_post_act_del(get_post_action(flow->priv), attr->post_act_handle); + + mlx5e_tc_tun_encap_dests_unset(flow->priv, flow, attr); + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) + mlx5_fc_destroy(counter_dev, attr->counter); + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); + mlx5e_tc_detach_mod_hdr(flow->priv, flow, attr); + } + + if (mlx5e_is_eswitch_flow(flow)) { + esw_attr = attr->esw_attr; + + if (esw_attr->int_port) + mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(flow->priv), + esw_attr->int_port); + + if (esw_attr->dest_int_port) + mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(flow->priv), + esw_attr->dest_int_port); + } + + mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr); + + free_branch_attr(flow, attr->branch_true); + free_branch_attr(flow, attr->branch_false); +} + +static int +mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, + struct flow_cls_offload *f, unsigned long flow_flags, + struct mlx5e_tc_flow_parse_attr **__parse_attr, + struct mlx5e_tc_flow **__flow) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr; + struct mlx5e_tc_flow *flow; + int err = -ENOMEM; + int out_index; + + flow = kzalloc(sizeof(*flow), GFP_KERNEL); + parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); + if (!parse_attr || !flow) + goto err_free; + + flow->flags = flow_flags; + flow->cookie = f->cookie; + flow->priv = priv; + + attr = mlx5_alloc_flow_attr(mlx5e_get_flow_namespace(flow)); + if (!attr) + goto err_free; + + flow->attr = attr; + + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) + INIT_LIST_HEAD(&flow->encaps[out_index].list); + INIT_LIST_HEAD(&flow->hairpin); + INIT_LIST_HEAD(&flow->l3_to_l2_reformat); + INIT_LIST_HEAD(&flow->attrs); + INIT_LIST_HEAD(&flow->peer_flows); + refcount_set(&flow->refcnt, 1); + init_completion(&flow->init_done); + init_completion(&flow->del_hw_done); + + *__flow = flow; + *__parse_attr = parse_attr; + + return 0; + +err_free: + kfree(flow); + kvfree(parse_attr); + return err; +} + +static void +mlx5e_flow_attr_init(struct mlx5_flow_attr *attr, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct flow_cls_offload *f) +{ + attr->parse_attr = parse_attr; + attr->chain = f->common.chain_index; + attr->prio = f->common.prio; +} + +static void +mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr, + struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct flow_cls_offload *f, + struct mlx5_eswitch_rep *in_rep, + struct mlx5_core_dev *in_mdev) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + + mlx5e_flow_attr_init(attr, parse_attr, f); + + esw_attr->in_rep = in_rep; + esw_attr->in_mdev = in_mdev; + + if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) == + MLX5_COUNTER_SOURCE_ESWITCH) + esw_attr->counter_dev = in_mdev; + else + esw_attr->counter_dev = priv->mdev; +} + +static struct mlx5e_tc_flow * +__mlx5e_add_fdb_flow(struct mlx5e_priv *priv, + struct flow_cls_offload *f, + unsigned long flow_flags, + struct net_device *filter_dev, + struct mlx5_eswitch_rep *in_rep, + struct mlx5_core_dev *in_mdev) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5e_tc_flow *flow; + int attr_size, err; + + flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); + attr_size = sizeof(struct mlx5_esw_flow_attr); + err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, + &parse_attr, &flow); + if (err) + goto out; + + parse_attr->filter_dev = filter_dev; + mlx5e_flow_esw_attr_init(flow->attr, + priv, parse_attr, + f, in_rep, in_mdev); + + err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, + f, filter_dev); + if (err) + goto err_free; + + /* actions validation depends on parsing the ct matches first */ + err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f, + &flow->attr->ct_attr, extack); + if (err) + goto err_free; + + err = parse_tc_fdb_actions(priv, &rule->action, flow, extack); + if (err) + goto err_free; + + err = mlx5e_tc_add_fdb_flow(priv, flow, extack); + complete_all(&flow->init_done); + if (err) { + if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev))) + goto err_free; + + add_unready_flow(flow); + } + + return flow; + +err_free: + mlx5e_flow_put(priv, flow); +out: + return ERR_PTR(err); +} + +static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, + struct mlx5e_tc_flow *flow, + unsigned long flow_flags, + struct mlx5_eswitch *peer_esw) +{ + struct mlx5e_priv *priv = flow->priv, *peer_priv; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; + struct mlx5e_tc_flow_parse_attr *parse_attr; + int i = mlx5_get_dev_index(peer_esw->dev); + struct mlx5e_rep_priv *peer_urpriv; + struct mlx5e_tc_flow *peer_flow; + struct mlx5_core_dev *in_mdev; + int err = 0; + + peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH); + peer_priv = netdev_priv(peer_urpriv->netdev); + + /* in_mdev is assigned of which the packet originated from. + * So packets redirected to uplink use the same mdev of the + * original flow and packets redirected from uplink use the + * peer mdev. + * In multiport eswitch it's a special case that we need to + * keep the original mdev. + */ + if (attr->in_rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(priv->mdev)) + in_mdev = peer_priv->mdev; + else + in_mdev = priv->mdev; + + parse_attr = flow->attr->parse_attr; + peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags, + parse_attr->filter_dev, + attr->in_rep, in_mdev); + if (IS_ERR(peer_flow)) { + err = PTR_ERR(peer_flow); + goto out; + } + + list_add_tail(&peer_flow->peer_flows, &flow->peer_flows); + flow_flag_set(flow, DUP); + mutex_lock(&esw->offloads.peer_mutex); + list_add_tail(&flow->peer[i], &esw->offloads.peer_flows[i]); + mutex_unlock(&esw->offloads.peer_mutex); + +out: + return err; +} + +static int +mlx5e_add_fdb_flow(struct mlx5e_priv *priv, + struct flow_cls_offload *f, + unsigned long flow_flags, + struct net_device *filter_dev, + struct mlx5e_tc_flow **__flow) +{ + struct mlx5_devcom_comp_dev *devcom = priv->mdev->priv.eswitch->devcom, *pos; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *in_rep = rpriv->rep; + struct mlx5_core_dev *in_mdev = priv->mdev; + struct mlx5_eswitch *peer_esw; + struct mlx5e_tc_flow *flow; + int err; + + flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep, + in_mdev); + if (IS_ERR(flow)) + return PTR_ERR(flow); + + if (!is_peer_flow_needed(flow)) { + *__flow = flow; + return 0; + } + + if (!mlx5_devcom_for_each_peer_begin(devcom)) { + err = -ENODEV; + goto clean_flow; + } + + mlx5_devcom_for_each_peer_entry(devcom, peer_esw, pos) { + err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags, peer_esw); + if (err) + goto peer_clean; + } + + mlx5_devcom_for_each_peer_end(devcom); + + *__flow = flow; + return 0; + +peer_clean: + mlx5e_tc_del_fdb_peers_flow(flow); + mlx5_devcom_for_each_peer_end(devcom); +clean_flow: + mlx5e_tc_del_fdb_flow(priv, flow); + return err; +} + +static int +mlx5e_add_nic_flow(struct mlx5e_priv *priv, + struct flow_cls_offload *f, + unsigned long flow_flags, + struct net_device *filter_dev, + struct mlx5e_tc_flow **__flow) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5e_tc_flow *flow; + int attr_size, err; + + if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) { + if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common)) + return -EOPNOTSUPP; + } else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) { + return -EOPNOTSUPP; + } + + flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); + attr_size = sizeof(struct mlx5_nic_flow_attr); + err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, + &parse_attr, &flow); + if (err) + goto out; + + parse_attr->filter_dev = filter_dev; + mlx5e_flow_attr_init(flow->attr, parse_attr, f); + + err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, + f, filter_dev); + if (err) + goto err_free; + + err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f, + &flow->attr->ct_attr, extack); + if (err) + goto err_free; + + err = parse_tc_nic_actions(priv, &rule->action, flow, extack); + if (err) + goto err_free; + + err = mlx5e_tc_add_nic_flow(priv, flow, extack); + if (err) + goto err_free; + + flow_flag_set(flow, OFFLOADED); + *__flow = flow; + + return 0; + +err_free: + flow_flag_set(flow, FAILED); + mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); + mlx5e_flow_put(priv, flow); +out: + return err; +} + +static int +mlx5e_tc_add_flow(struct mlx5e_priv *priv, + struct flow_cls_offload *f, + unsigned long flags, + struct net_device *filter_dev, + struct mlx5e_tc_flow **flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + unsigned long flow_flags; + int err; + + get_flags(flags, &flow_flags); + + if (!tc_can_offload_extack(priv->netdev, f->common.extack)) + return -EOPNOTSUPP; + + if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS) + err = mlx5e_add_fdb_flow(priv, f, flow_flags, + filter_dev, flow); + else + err = mlx5e_add_nic_flow(priv, f, flow_flags, + filter_dev, flow); + + return err; +} + +static bool is_flow_rule_duplicate_allowed(struct net_device *dev, + struct mlx5e_rep_priv *rpriv) +{ + /* Offloaded flow rule is allowed to duplicate on non-uplink representor + * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this + * function is called from NIC mode. + */ + return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK; +} + +/* As IPsec and TC order is not aligned between software and hardware-offload, + * either IPsec offload or TC offload, not both, is allowed for a specific interface. + */ +static bool is_tc_ipsec_order_check_needed(struct net_device *filter, struct mlx5e_priv *priv) +{ + if (!IS_ENABLED(CONFIG_MLX5_EN_IPSEC)) + return false; + + if (filter != priv->netdev) + return false; + + if (mlx5e_eswitch_vf_rep(priv->netdev)) + return false; + + return true; +} + +static int mlx5e_tc_block_ipsec_offload(struct net_device *filter, struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + if (!is_tc_ipsec_order_check_needed(filter, priv)) + return 0; + + if (mdev->num_block_tc) + return -EBUSY; + + mdev->num_block_ipsec++; + + return 0; +} + +static void mlx5e_tc_unblock_ipsec_offload(struct net_device *filter, struct mlx5e_priv *priv) +{ + if (!is_tc_ipsec_order_check_needed(filter, priv)) + return; + + priv->mdev->num_block_ipsec--; +} + +int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, + struct flow_cls_offload *f, unsigned long flags) +{ + struct netlink_ext_ack *extack = f->common.extack; + struct rhashtable *tc_ht = get_tc_ht(priv, flags); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_tc_flow *flow; + int err = 0; + + if (!mlx5_esw_hold(priv->mdev)) + return -EBUSY; + + err = mlx5e_tc_block_ipsec_offload(dev, priv); + if (err) + goto esw_release; + + mlx5_esw_get(priv->mdev); + + rcu_read_lock(); + flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); + if (flow) { + /* Same flow rule offloaded to non-uplink representor sharing tc block, + * just return 0. + */ + if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev) + goto rcu_unlock; + + NL_SET_ERR_MSG_MOD(extack, + "flow cookie already exists, ignoring"); + netdev_warn_once(priv->netdev, + "flow cookie %lx already exists, ignoring\n", + f->cookie); + err = -EEXIST; + goto rcu_unlock; + } +rcu_unlock: + rcu_read_unlock(); + if (flow) + goto out; + + trace_mlx5e_configure_flower(f); + err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow); + if (err) + goto out; + + /* Flow rule offloaded to non-uplink representor sharing tc block, + * set the flow's owner dev. + */ + if (is_flow_rule_duplicate_allowed(dev, rpriv)) + flow->orig_dev = dev; + + err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params); + if (err) + goto err_free; + + mlx5_esw_release(priv->mdev); + return 0; + +err_free: + mlx5e_flow_put(priv, flow); +out: + mlx5e_tc_unblock_ipsec_offload(dev, priv); + mlx5_esw_put(priv->mdev); +esw_release: + mlx5_esw_release(priv->mdev); + return err; +} + +static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags) +{ + bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS)); + bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS)); + + return flow_flag_test(flow, INGRESS) == dir_ingress && + flow_flag_test(flow, EGRESS) == dir_egress; +} + +int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, + struct flow_cls_offload *f, unsigned long flags) +{ + struct rhashtable *tc_ht = get_tc_ht(priv, flags); + struct mlx5e_tc_flow *flow; + int err; + + rcu_read_lock(); + flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); + if (!flow || !same_flow_direction(flow, flags)) { + err = -EINVAL; + goto errout; + } + + /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag + * set. + */ + if (flow_flag_test_and_set(flow, DELETED)) { + err = -EINVAL; + goto errout; + } + rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params); + rcu_read_unlock(); + + trace_mlx5e_delete_flower(f); + mlx5e_flow_put(priv, flow); + + mlx5e_tc_unblock_ipsec_offload(dev, priv); + mlx5_esw_put(priv->mdev); + return 0; + +errout: + rcu_read_unlock(); + return err; +} + +int mlx5e_tc_fill_action_stats(struct mlx5e_priv *priv, + struct flow_offload_action *fl_act) +{ + return mlx5e_tc_act_stats_fill_stats(get_act_stats_handle(priv), fl_act); +} + +int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, + struct flow_cls_offload *f, unsigned long flags) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct rhashtable *tc_ht = get_tc_ht(priv, flags); + struct mlx5e_tc_flow *flow; + struct mlx5_fc *counter; + u64 lastuse = 0; + u64 packets = 0; + u64 bytes = 0; + int err = 0; + + rcu_read_lock(); + flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie, + tc_ht_params)); + rcu_read_unlock(); + if (IS_ERR(flow)) + return PTR_ERR(flow); + + if (!same_flow_direction(flow, flags)) { + err = -EINVAL; + goto errout; + } + + if (mlx5e_is_offloaded_flow(flow)) { + if (flow_flag_test(flow, USE_ACT_STATS)) { + f->use_act_stats = true; + } else { + counter = mlx5e_tc_get_counter(flow); + if (!counter) + goto errout; + + mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); + } + } + + /* Under multipath it's possible for one rule to be currently + * un-offloaded while the other rule is offloaded. + */ + if (esw && !mlx5_devcom_for_each_peer_begin(esw->devcom)) + goto out; + + if (flow_flag_test(flow, DUP)) { + struct mlx5e_tc_flow *peer_flow; + + list_for_each_entry(peer_flow, &flow->peer_flows, peer_flows) { + u64 packets2; + u64 lastuse2; + u64 bytes2; + + if (!flow_flag_test(peer_flow, OFFLOADED)) + continue; + if (flow_flag_test(flow, USE_ACT_STATS)) { + f->use_act_stats = true; + break; + } + + counter = mlx5e_tc_get_counter(peer_flow); + if (!counter) + goto no_peer_counter; + mlx5_fc_query_cached(counter, &bytes2, &packets2, + &lastuse2); + + bytes += bytes2; + packets += packets2; + lastuse = max_t(u64, lastuse, lastuse2); + } + } + +no_peer_counter: + if (esw) + mlx5_devcom_for_each_peer_end(esw->devcom); +out: + flow_stats_update(&f->stats, bytes, packets, 0, lastuse, + FLOW_ACTION_HW_STATS_DELAYED); + trace_mlx5e_stats_flower(f); +errout: + mlx5e_flow_put(priv, flow); + return err; +} + +static int apply_police_params(struct mlx5e_priv *priv, u64 rate, + struct netlink_ext_ack *extack) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch *esw; + u32 rate_mbps = 0; + u16 vport_num; + int err; + + vport_num = rpriv->rep->vport; + if (vport_num >= MLX5_VPORT_ECPF) { + NL_SET_ERR_MSG_MOD(extack, + "Ingress rate limit is supported only for Eswitch ports connected to VFs"); + return -EOPNOTSUPP; + } + + esw = priv->mdev->priv.eswitch; + /* rate is given in bytes/sec. + * First convert to bits/sec and then round to the nearest mbit/secs. + * mbit means million bits. + * Moreover, if rate is non zero we choose to configure to a minimum of + * 1 mbit/sec. + */ + if (rate) { + rate = (rate * BITS_PER_BYTE) + 500000; + do_div(rate, 1000000); + rate_mbps = max_t(u32, rate, 1); + } + + err = mlx5_esw_qos_modify_vport_rate(esw, vport_num, rate_mbps); + if (err) + NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware"); + + return err; +} + +static int +tc_matchall_police_validate(const struct flow_action *action, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not continue"); + return -EOPNOTSUPP; + } + + if (act->police.exceed.act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when exceed action is not drop"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && + !flow_action_is_last_entry(action, act)) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is ok, but action is not last"); + return -EOPNOTSUPP; + } + + if (act->police.peakrate_bytes_ps || + act->police.avrate || act->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, + struct flow_action *flow_action, + struct netlink_ext_ack *extack) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + const struct flow_action_entry *act; + int err; + int i; + + if (!flow_action_has_entries(flow_action)) { + NL_SET_ERR_MSG_MOD(extack, "matchall called with no action"); + return -EINVAL; + } + + if (!flow_offload_has_one_action(flow_action)) { + NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action"); + return -EOPNOTSUPP; + } + + if (!flow_action_basic_hw_stats_check(flow_action, extack)) { + NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported"); + return -EOPNOTSUPP; + } + + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_POLICE: + err = tc_matchall_police_validate(flow_action, act, extack); + if (err) + return err; + + err = apply_police_params(priv, act->police.rate_bytes_ps, extack); + if (err) + return err; + + mlx5e_stats_copy_rep_stats(&rpriv->prev_vf_vport_stats, + &priv->stats.rep_stats); + break; + default: + NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall"); + return -EOPNOTSUPP; + } + } + + return 0; +} + +int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) +{ + struct netlink_ext_ack *extack = ma->common.extack; + + if (ma->common.prio != 1) { + NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported"); + return -EINVAL; + } + + return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack); +} + +int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) +{ + struct netlink_ext_ack *extack = ma->common.extack; + + return apply_police_params(priv, 0, extack); +} + +void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct rtnl_link_stats64 cur_stats; + u64 dbytes; + u64 dpkts; + + mlx5e_stats_copy_rep_stats(&cur_stats, &priv->stats.rep_stats); + dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets; + dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes; + rpriv->prev_vf_vport_stats = cur_stats; + flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies, + FLOW_ACTION_HW_STATS_DELAYED); +} + +static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv, + struct mlx5e_priv *peer_priv) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5_core_dev *peer_mdev = peer_priv->mdev; + struct mlx5e_hairpin_entry *hpe, *tmp; + LIST_HEAD(init_wait_list); + u16 peer_vhca_id; + int bkt; + + if (!mlx5e_same_hw_devs(priv, peer_priv)) + return; + + peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id); + + mutex_lock(&tc->hairpin_tbl_lock); + hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist) + if (refcount_inc_not_zero(&hpe->refcnt)) + list_add(&hpe->dead_peer_wait_list, &init_wait_list); + mutex_unlock(&tc->hairpin_tbl_lock); + + list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) { + wait_for_completion(&hpe->res_ready); + if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id) + mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair); + + mlx5e_hairpin_put(priv, hpe); + } +} + +static int mlx5e_tc_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + struct mlx5e_priv *peer_priv; + struct mlx5e_tc_table *tc; + struct mlx5e_priv *priv; + + if (ndev->netdev_ops != &mlx5e_netdev_ops || + event != NETDEV_UNREGISTER || + ndev->reg_state == NETREG_REGISTERED) + return NOTIFY_DONE; + + tc = container_of(this, struct mlx5e_tc_table, netdevice_nb); + priv = tc->priv; + peer_priv = netdev_priv(ndev); + if (priv == peer_priv || + !(priv->netdev->features & NETIF_F_HW_TC)) + return NOTIFY_DONE; + + mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv); + + return NOTIFY_DONE; +} + +static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5_flow_table **ft = &tc->miss_t; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns; + int err = 0; + + ft_attr.max_fte = 1; + ft_attr.autogroup.max_num_groups = 1; + ft_attr.level = MLX5E_TC_MISS_LEVEL; + ft_attr.prio = 0; + ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); + + *ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(*ft)) { + err = PTR_ERR(*ft); + netdev_err(priv->netdev, "failed to create tc nic miss table err=%d\n", err); + } + + return err; +} + +static void mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + + mlx5_destroy_flow_table(tc->miss_t); +} + +int mlx5e_tc_nic_init(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5_core_dev *dev = priv->mdev; + struct mapping_ctx *chains_mapping; + struct mlx5_chains_attr attr = {}; + u64 mapping_id; + int err; + + mlx5e_mod_hdr_tbl_init(&tc->mod_hdr); + mutex_init(&tc->t_lock); + mutex_init(&tc->hairpin_tbl_lock); + hash_init(tc->hairpin_tbl); + tc->priv = priv; + + err = rhashtable_init(&tc->ht, &tc_ht_params); + if (err) + return err; + + lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key); + lockdep_init_map(&tc->ht.run_work.lockdep_map, "tc_ht_wq_key", &tc_ht_wq_key, 0); + + mapping_id = mlx5_query_nic_system_image_guid(dev); + + chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN, + sizeof(struct mlx5_mapped_obj), + MLX5E_TC_TABLE_CHAIN_TAG_MASK, true); + + if (IS_ERR(chains_mapping)) { + err = PTR_ERR(chains_mapping); + goto err_mapping; + } + tc->mapping = chains_mapping; + + err = mlx5e_tc_nic_create_miss_table(priv); + if (err) + goto err_chains; + + if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) + attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED | + MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED; + attr.ns = MLX5_FLOW_NAMESPACE_KERNEL; + attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS; + attr.default_ft = tc->miss_t; + attr.mapping = chains_mapping; + attr.fs_base_prio = MLX5E_TC_PRIO; + + tc->chains = mlx5_chains_create(dev, &attr); + if (IS_ERR(tc->chains)) { + err = PTR_ERR(tc->chains); + goto err_miss; + } + + mlx5_chains_print_info(tc->chains); + + tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL); + tc->ct = mlx5_tc_ct_init(priv, tc->chains, &tc->mod_hdr, + MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act); + + tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event; + err = register_netdevice_notifier_dev_net(priv->netdev, + &tc->netdevice_nb, + &tc->netdevice_nn); + if (err) { + tc->netdevice_nb.notifier_call = NULL; + mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n"); + goto err_reg; + } + + mlx5e_tc_debugfs_init(tc, mlx5e_fs_get_debugfs_root(priv->fs)); + + tc->action_stats_handle = mlx5e_tc_act_stats_create(); + if (IS_ERR(tc->action_stats_handle)) { + err = PTR_ERR(tc->action_stats_handle); + goto err_act_stats; + } + + return 0; + +err_act_stats: + unregister_netdevice_notifier_dev_net(priv->netdev, + &tc->netdevice_nb, + &tc->netdevice_nn); +err_reg: + mlx5_tc_ct_clean(tc->ct); + mlx5e_tc_post_act_destroy(tc->post_act); + mlx5_chains_destroy(tc->chains); +err_miss: + mlx5e_tc_nic_destroy_miss_table(priv); +err_chains: + mapping_destroy(chains_mapping); +err_mapping: + rhashtable_destroy(&tc->ht); + return err; +} + +static void _mlx5e_tc_del_flow(void *ptr, void *arg) +{ + struct mlx5e_tc_flow *flow = ptr; + struct mlx5e_priv *priv = flow->priv; + + mlx5e_tc_del_flow(priv, flow); + kfree(flow); +} + +void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + + debugfs_remove_recursive(tc->dfs_root); + + if (tc->netdevice_nb.notifier_call) + unregister_netdevice_notifier_dev_net(priv->netdev, + &tc->netdevice_nb, + &tc->netdevice_nn); + + mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr); + mutex_destroy(&tc->hairpin_tbl_lock); + + rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL); + + if (!IS_ERR_OR_NULL(tc->t)) { + mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL); + tc->t = NULL; + } + mutex_destroy(&tc->t_lock); + + mlx5_tc_ct_clean(tc->ct); + mlx5e_tc_post_act_destroy(tc->post_act); + mapping_destroy(tc->mapping); + mlx5_chains_destroy(tc->chains); + mlx5e_tc_nic_destroy_miss_table(priv); + mlx5e_tc_act_stats_free(tc->action_stats_handle); +} + +int mlx5e_tc_ht_init(struct rhashtable *tc_ht) +{ + int err; + + err = rhashtable_init(tc_ht, &tc_ht_params); + if (err) + return err; + + lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key); + lockdep_init_map(&tc_ht->run_work.lockdep_map, "tc_ht_wq_key", &tc_ht_wq_key, 0); + + return 0; +} + +void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht) +{ + rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); +} + +int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv) +{ + const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts); + struct netdev_phys_item_id ppid; + struct mlx5e_rep_priv *rpriv; + struct mapping_ctx *mapping; + struct mlx5_eswitch *esw; + struct mlx5e_priv *priv; + u64 mapping_id, key; + int err = 0; + + rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); + priv = netdev_priv(rpriv->netdev); + esw = priv->mdev->priv.eswitch; + + uplink_priv->post_act = mlx5e_tc_post_act_init(priv, esw_chains(esw), + MLX5_FLOW_NAMESPACE_FDB); + uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev), + esw_chains(esw), + &esw->offloads.mod_hdr, + MLX5_FLOW_NAMESPACE_FDB, + uplink_priv->post_act); + + uplink_priv->int_port_priv = mlx5e_tc_int_port_init(netdev_priv(priv->netdev)); + + uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act); + + mapping_id = mlx5_query_nic_system_image_guid(esw->dev); + + mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL, + sizeof(struct tunnel_match_key), + TUNNEL_INFO_BITS_MASK, true); + + if (IS_ERR(mapping)) { + err = PTR_ERR(mapping); + goto err_tun_mapping; + } + uplink_priv->tunnel_mapping = mapping; + + /* Two last values are reserved for stack devices slow path table mark + * and bridge ingress push mark. + */ + mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS, + sz_enc_opts, ENC_OPTS_BITS_MASK - 2, true); + if (IS_ERR(mapping)) { + err = PTR_ERR(mapping); + goto err_enc_opts_mapping; + } + uplink_priv->tunnel_enc_opts_mapping = mapping; + + uplink_priv->encap = mlx5e_tc_tun_init(priv); + if (IS_ERR(uplink_priv->encap)) { + err = PTR_ERR(uplink_priv->encap); + goto err_register_fib_notifier; + } + + uplink_priv->action_stats_handle = mlx5e_tc_act_stats_create(); + if (IS_ERR(uplink_priv->action_stats_handle)) { + err = PTR_ERR(uplink_priv->action_stats_handle); + goto err_action_counter; + } + + err = dev_get_port_parent_id(priv->netdev, &ppid, false); + if (!err) { + memcpy(&key, &ppid.id, sizeof(key)); + mlx5_esw_offloads_devcom_init(esw, key); + } + + return 0; + +err_action_counter: + mlx5e_tc_tun_cleanup(uplink_priv->encap); +err_register_fib_notifier: + mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); +err_enc_opts_mapping: + mapping_destroy(uplink_priv->tunnel_mapping); +err_tun_mapping: + mlx5e_tc_sample_cleanup(uplink_priv->tc_psample); + mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv); + mlx5_tc_ct_clean(uplink_priv->ct_priv); + netdev_warn(priv->netdev, + "Failed to initialize tc (eswitch), err: %d", err); + mlx5e_tc_post_act_destroy(uplink_priv->post_act); + return err; +} + +void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv) +{ + struct mlx5e_rep_priv *rpriv; + struct mlx5_eswitch *esw; + struct mlx5e_priv *priv; + + rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); + priv = netdev_priv(rpriv->netdev); + esw = priv->mdev->priv.eswitch; + + mlx5_esw_offloads_devcom_cleanup(esw); + + mlx5e_tc_tun_cleanup(uplink_priv->encap); + + mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); + mapping_destroy(uplink_priv->tunnel_mapping); + + mlx5e_tc_sample_cleanup(uplink_priv->tc_psample); + mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv); + mlx5_tc_ct_clean(uplink_priv->ct_priv); + mlx5e_flow_meters_cleanup(uplink_priv->flow_meters); + mlx5e_tc_post_act_destroy(uplink_priv->post_act); + mlx5e_tc_act_stats_free(uplink_priv->action_stats_handle); +} + +int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags) +{ + struct rhashtable *tc_ht = get_tc_ht(priv, flags); + + return atomic_read(&tc_ht->nelems); +} + +void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw) +{ + struct mlx5e_tc_flow *flow, *tmp; + int i; + + for (i = 0; i < MLX5_MAX_PORTS; i++) { + if (i == mlx5_get_dev_index(esw->dev)) + continue; + list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows[i], peer[i]) + mlx5e_tc_del_fdb_peers_flow(flow); + } +} + +void mlx5e_tc_reoffload_flows_work(struct work_struct *work) +{ + struct mlx5_rep_uplink_priv *rpriv = + container_of(work, struct mlx5_rep_uplink_priv, + reoffload_flows_work); + struct mlx5e_tc_flow *flow, *tmp; + + mutex_lock(&rpriv->unready_flows_lock); + list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) { + if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL)) + unready_flow_del(flow); + } + mutex_unlock(&rpriv->unready_flows_lock); +} + +static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, + struct flow_cls_offload *cls_flower, + unsigned long flags) +{ + switch (cls_flower->command) { + case FLOW_CLS_REPLACE: + return mlx5e_configure_flower(priv->netdev, priv, cls_flower, + flags); + case FLOW_CLS_DESTROY: + return mlx5e_delete_flower(priv->netdev, priv, cls_flower, + flags); + case FLOW_CLS_STATS: + return mlx5e_stats_flower(priv->netdev, priv, cls_flower, + flags); + default: + return -EOPNOTSUPP; + } +} + +int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + unsigned long flags = MLX5_TC_FLAG(INGRESS); + struct mlx5e_priv *priv = cb_priv; + + if (!priv->netdev || !netif_device_present(priv->netdev)) + return -EOPNOTSUPP; + + if (mlx5e_is_uplink_rep(priv)) + flags |= MLX5_TC_FLAG(ESW_OFFLOAD); + else + flags |= MLX5_TC_FLAG(NIC_OFFLOAD); + + switch (type) { + case TC_SETUP_CLSFLOWER: + return mlx5e_setup_tc_cls_flower(priv, type_data, flags); + default: + return -EOPNOTSUPP; + } +} + +static bool mlx5e_tc_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5e_tc_update_priv *tc_priv, + u32 tunnel_id) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct tunnel_match_enc_opts enc_opts = {}; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct metadata_dst *tun_dst; + struct tunnel_match_key key; + u32 tun_id, enc_opts_id; + struct net_device *dev; + int err; + + enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK; + tun_id = tunnel_id >> ENC_OPTS_BITS; + + if (!tun_id) + return true; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key); + if (err) { + netdev_dbg(priv->netdev, + "Couldn't find tunnel for tun_id: %d, err: %d\n", + tun_id, err); + return false; + } + + if (enc_opts_id) { + err = mapping_find(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_id, &enc_opts); + if (err) { + netdev_dbg(priv->netdev, + "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n", + enc_opts_id, err); + return false; + } + } + + switch (key.enc_control.addr_type) { + case FLOW_DISSECTOR_KEY_IPV4_ADDRS: + tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst, + key.enc_ip.tos, key.enc_ip.ttl, + key.enc_tp.dst, TUNNEL_KEY, + key32_to_tunnel_id(key.enc_key_id.keyid), + enc_opts.key.len); + break; + case FLOW_DISSECTOR_KEY_IPV6_ADDRS: + tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst, + key.enc_ip.tos, key.enc_ip.ttl, + key.enc_tp.dst, 0, TUNNEL_KEY, + key32_to_tunnel_id(key.enc_key_id.keyid), + enc_opts.key.len); + break; + default: + netdev_dbg(priv->netdev, + "Couldn't restore tunnel, unsupported addr_type: %d\n", + key.enc_control.addr_type); + return false; + } + + if (!tun_dst) { + netdev_dbg(priv->netdev, "Couldn't restore tunnel, no tun_dst\n"); + return false; + } + + tun_dst->u.tun_info.key.tp_src = key.enc_tp.src; + + if (enc_opts.key.len) + ip_tunnel_info_opts_set(&tun_dst->u.tun_info, + enc_opts.key.data, + enc_opts.key.len, + enc_opts.key.dst_opt_type); + + skb_dst_set(skb, (struct dst_entry *)tun_dst); + dev = dev_get_by_index(&init_net, key.filter_ifindex); + if (!dev) { + netdev_dbg(priv->netdev, + "Couldn't find tunnel device with ifindex: %d\n", + key.filter_ifindex); + return false; + } + + /* Set fwd_dev so we do dev_put() after datapath */ + tc_priv->fwd_dev = dev; + + skb->dev = dev; + + return true; +} + +static bool mlx5e_tc_restore_skb_tc_meta(struct sk_buff *skb, struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_mapped_obj *mapped_obj, u32 zone_restore_id, + u32 tunnel_id, struct mlx5e_tc_update_priv *tc_priv) +{ + struct mlx5e_priv *priv = netdev_priv(skb->dev); + struct tc_skb_ext *tc_skb_ext; + u64 act_miss_cookie; + u32 chain; + + chain = mapped_obj->type == MLX5_MAPPED_OBJ_CHAIN ? mapped_obj->chain : 0; + act_miss_cookie = mapped_obj->type == MLX5_MAPPED_OBJ_ACT_MISS ? + mapped_obj->act_miss_cookie : 0; + if (chain || act_miss_cookie) { + if (!mlx5e_tc_ct_restore_flow(ct_priv, skb, zone_restore_id)) + return false; + + tc_skb_ext = tc_skb_ext_alloc(skb); + if (!tc_skb_ext) { + WARN_ON(1); + return false; + } + + if (act_miss_cookie) { + tc_skb_ext->act_miss_cookie = act_miss_cookie; + tc_skb_ext->act_miss = 1; + } else { + tc_skb_ext->chain = chain; + } + } + + if (tc_priv) + return mlx5e_tc_restore_tunnel(priv, skb, tc_priv, tunnel_id); + + return true; +} + +static void mlx5e_tc_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5_mapped_obj *mapped_obj, + struct mlx5e_tc_update_priv *tc_priv) +{ + if (!mlx5e_tc_restore_tunnel(priv, skb, tc_priv, mapped_obj->sample.tunnel_id)) { + netdev_dbg(priv->netdev, + "Failed to restore tunnel info for sampled packet\n"); + return; + } + mlx5e_tc_sample_skb(skb, mapped_obj); +} + +static bool mlx5e_tc_restore_skb_int_port(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5_mapped_obj *mapped_obj, + struct mlx5e_tc_update_priv *tc_priv, + u32 tunnel_id) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + bool forward_tx = false; + + /* Tunnel restore takes precedence over int port restore */ + if (tunnel_id) + return mlx5e_tc_restore_tunnel(priv, skb, tc_priv, tunnel_id); + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + if (mlx5e_tc_int_port_dev_fwd(uplink_priv->int_port_priv, skb, + mapped_obj->int_port_metadata, &forward_tx)) { + /* Set fwd_dev for future dev_put */ + tc_priv->fwd_dev = skb->dev; + tc_priv->forward_tx = forward_tx; + + return true; + } + + return false; +} + +bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb, + struct mapping_ctx *mapping_ctx, u32 mapped_obj_id, + struct mlx5_tc_ct_priv *ct_priv, + u32 zone_restore_id, u32 tunnel_id, + struct mlx5e_tc_update_priv *tc_priv) +{ + struct mlx5e_priv *priv = netdev_priv(skb->dev); + struct mlx5_mapped_obj mapped_obj; + int err; + + err = mapping_find(mapping_ctx, mapped_obj_id, &mapped_obj); + if (err) { + netdev_dbg(skb->dev, + "Couldn't find mapped object for mapped_obj_id: %d, err: %d\n", + mapped_obj_id, err); + return false; + } + + switch (mapped_obj.type) { + case MLX5_MAPPED_OBJ_CHAIN: + case MLX5_MAPPED_OBJ_ACT_MISS: + return mlx5e_tc_restore_skb_tc_meta(skb, ct_priv, &mapped_obj, zone_restore_id, + tunnel_id, tc_priv); + case MLX5_MAPPED_OBJ_SAMPLE: + mlx5e_tc_restore_skb_sample(priv, skb, &mapped_obj, tc_priv); + tc_priv->skb_done = true; + return true; + case MLX5_MAPPED_OBJ_INT_PORT_METADATA: + return mlx5e_tc_restore_skb_int_port(priv, skb, &mapped_obj, tc_priv, tunnel_id); + default: + netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type); + return false; + } + + return false; +} + +bool mlx5e_tc_update_skb_nic(struct mlx5_cqe64 *cqe, struct sk_buff *skb) +{ + struct mlx5e_priv *priv = netdev_priv(skb->dev); + u32 mapped_obj_id, reg_b, zone_restore_id; + struct mlx5_tc_ct_priv *ct_priv; + struct mapping_ctx *mapping_ctx; + struct mlx5e_tc_table *tc; + + reg_b = be32_to_cpu(cqe->ft_metadata); + tc = mlx5e_fs_get_tc(priv->fs); + mapped_obj_id = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK; + zone_restore_id = (reg_b >> MLX5_REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) & + ESW_ZONE_ID_MASK; + ct_priv = tc->ct; + mapping_ctx = tc->mapping; + + return mlx5e_tc_update_skb(cqe, skb, mapping_ctx, mapped_obj_id, ct_priv, zone_restore_id, + 0, NULL); +} + +static struct mapping_ctx * +mlx5e_get_priv_obj_mapping(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_table *tc; + struct mlx5_eswitch *esw; + struct mapping_ctx *ctx; + + if (is_mdev_switchdev_mode(priv->mdev)) { + esw = priv->mdev->priv.eswitch; + ctx = esw->offloads.reg_c0_obj_pool; + } else { + tc = mlx5e_fs_get_tc(priv->fs); + ctx = tc->mapping; + } + + return ctx; +} + +int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, + u64 act_miss_cookie, u32 *act_miss_mapping) +{ + struct mlx5_mapped_obj mapped_obj = {}; + struct mlx5_eswitch *esw; + struct mapping_ctx *ctx; + int err; + + ctx = mlx5e_get_priv_obj_mapping(priv); + mapped_obj.type = MLX5_MAPPED_OBJ_ACT_MISS; + mapped_obj.act_miss_cookie = act_miss_cookie; + err = mapping_add(ctx, &mapped_obj, act_miss_mapping); + if (err) + return err; + + if (!is_mdev_switchdev_mode(priv->mdev)) + return 0; + + esw = priv->mdev->priv.eswitch; + attr->act_id_restore_rule = esw_add_restore_rule(esw, *act_miss_mapping); + if (IS_ERR(attr->act_id_restore_rule)) { + err = PTR_ERR(attr->act_id_restore_rule); + goto err_rule; + } + + return 0; + +err_rule: + mapping_remove(ctx, *act_miss_mapping); + return err; +} + +void mlx5e_tc_action_miss_mapping_put(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, + u32 act_miss_mapping) +{ + struct mapping_ctx *ctx = mlx5e_get_priv_obj_mapping(priv); + + if (is_mdev_switchdev_mode(priv->mdev)) + mlx5_del_flow_rules(attr->act_id_restore_rule); + mapping_remove(ctx, act_miss_mapping); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h new file mode 100644 index 0000000000..adb39e30f9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -0,0 +1,409 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_EN_TC_H__ +#define __MLX5_EN_TC_H__ + +#include +#include "en.h" +#include "eswitch.h" +#include "en/tc_ct.h" +#include "en/tc_tun.h" +#include "en/tc/int_port.h" +#include "en/tc/meter.h" +#include "en_rep.h" + +#define MLX5E_TC_FLOW_ID_MASK 0x0000ffff + +#ifdef CONFIG_MLX5_ESWITCH + +#define NIC_FLOW_ATTR_SZ (sizeof(struct mlx5_flow_attr) +\ + sizeof(struct mlx5_nic_flow_attr)) +#define ESW_FLOW_ATTR_SZ (sizeof(struct mlx5_flow_attr) +\ + sizeof(struct mlx5_esw_flow_attr)) +#define ns_to_attr_sz(ns) (((ns) == MLX5_FLOW_NAMESPACE_FDB) ?\ + ESW_FLOW_ATTR_SZ :\ + NIC_FLOW_ATTR_SZ) + +struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc); +int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags); + +struct mlx5e_tc_update_priv { + struct net_device *fwd_dev; + bool skb_done; + bool forward_tx; +}; + +struct mlx5_nic_flow_attr { + u32 flow_tag; + u32 hairpin_tirn; + struct mlx5_flow_table *hairpin_ft; +}; + +struct mlx5_flow_attr { + u32 action; + unsigned long tc_act_cookies[TCA_ACT_MAX_PRIO]; + struct mlx5_fc *counter; + struct mlx5_modify_hdr *modify_hdr; + struct mlx5e_mod_hdr_handle *mh; /* attached mod header instance */ + struct mlx5e_mod_hdr_handle *slow_mh; /* attached mod header instance for slow path */ + struct mlx5_ct_attr ct_attr; + struct mlx5e_sample_attr sample_attr; + struct mlx5e_meter_attr meter_attr; + struct mlx5e_tc_flow_parse_attr *parse_attr; + u32 chain; + u16 prio; + u16 tc_act_cookies_count; + u32 dest_chain; + struct mlx5_flow_table *ft; + struct mlx5_flow_table *dest_ft; + u8 inner_match_level; + u8 outer_match_level; + u8 tun_ip_version; + int tunnel_id; /* mapped tunnel id */ + u32 flags; + u32 exe_aso_type; + struct list_head list; + struct mlx5e_post_act_handle *post_act_handle; + struct mlx5_flow_attr *branch_true; + struct mlx5_flow_attr *branch_false; + struct mlx5_flow_attr *jumping_attr; + struct mlx5_flow_handle *act_id_restore_rule; + /* keep this union last */ + union { + DECLARE_FLEX_ARRAY(struct mlx5_esw_flow_attr, esw_attr); + DECLARE_FLEX_ARRAY(struct mlx5_nic_flow_attr, nic_attr); + }; +}; + +enum { + MLX5_ATTR_FLAG_VLAN_HANDLED = BIT(0), + MLX5_ATTR_FLAG_SLOW_PATH = BIT(1), + MLX5_ATTR_FLAG_NO_IN_PORT = BIT(2), + MLX5_ATTR_FLAG_SRC_REWRITE = BIT(3), + MLX5_ATTR_FLAG_SAMPLE = BIT(4), + MLX5_ATTR_FLAG_ACCEPT = BIT(5), + MLX5_ATTR_FLAG_CT = BIT(6), + MLX5_ATTR_FLAG_TERMINATING = BIT(7), + MLX5_ATTR_FLAG_MTU = BIT(8), +}; + +/* Returns true if any of the flags that require skipping further TC/NF processing are set. */ +static inline bool +mlx5e_tc_attr_flags_skip(u32 attr_flags) +{ + return attr_flags & (MLX5_ATTR_FLAG_SLOW_PATH | MLX5_ATTR_FLAG_ACCEPT); +} + +struct mlx5_rx_tun_attr { + u16 decap_vport; + union { + __be32 v4; + struct in6_addr v6; + } src_ip; /* Valid if decap_vport is not zero */ + union { + __be32 v4; + struct in6_addr v6; + } dst_ip; /* Valid if decap_vport is not zero */ +}; + +#define MLX5E_TC_TABLE_CHAIN_TAG_BITS 16 +#define MLX5E_TC_TABLE_CHAIN_TAG_MASK GENMASK(MLX5E_TC_TABLE_CHAIN_TAG_BITS - 1, 0) + +#define MLX5E_TC_MAX_INT_PORT_NUM (8) + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + +struct tunnel_match_key { + struct flow_dissector_key_control enc_control; + struct flow_dissector_key_keyid enc_key_id; + struct flow_dissector_key_ports enc_tp; + struct flow_dissector_key_ip enc_ip; + union { + struct flow_dissector_key_ipv4_addrs enc_ipv4; + struct flow_dissector_key_ipv6_addrs enc_ipv6; + }; + + int filter_ifindex; +}; + +struct tunnel_match_enc_opts { + struct flow_dissector_key_enc_opts key; + struct flow_dissector_key_enc_opts mask; +}; + +/* Tunnel_id mapping is TUNNEL_INFO_BITS + ENC_OPTS_BITS. + * Upper TUNNEL_INFO_BITS for general tunnel info. + * Lower ENC_OPTS_BITS bits for enc_opts. + */ +#define TUNNEL_INFO_BITS 12 +#define TUNNEL_INFO_BITS_MASK GENMASK(TUNNEL_INFO_BITS - 1, 0) +#define ENC_OPTS_BITS 11 +#define ENC_OPTS_BITS_MASK GENMASK(ENC_OPTS_BITS - 1, 0) +#define TUNNEL_ID_BITS (TUNNEL_INFO_BITS + ENC_OPTS_BITS) +#define TUNNEL_ID_MASK GENMASK(TUNNEL_ID_BITS - 1, 0) + +enum { + MLX5E_TC_FLAG_INGRESS_BIT, + MLX5E_TC_FLAG_EGRESS_BIT, + MLX5E_TC_FLAG_NIC_OFFLOAD_BIT, + MLX5E_TC_FLAG_ESW_OFFLOAD_BIT, + MLX5E_TC_FLAG_FT_OFFLOAD_BIT, + MLX5E_TC_FLAG_LAST_EXPORTED_BIT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT, +}; + +#define MLX5_TC_FLAG(flag) BIT(MLX5E_TC_FLAG_##flag##_BIT) + +int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv); +void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv); + +int mlx5e_tc_ht_init(struct rhashtable *tc_ht); +void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht); + +int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, + struct flow_cls_offload *f, unsigned long flags); +int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, + struct flow_cls_offload *f, unsigned long flags); + +int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, + struct flow_cls_offload *f, unsigned long flags); +int mlx5e_tc_fill_action_stats(struct mlx5e_priv *priv, + struct flow_offload_action *fl_act); + +int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *f); +int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *f); +void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma); + +struct mlx5e_encap_entry; +void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct list_head *flow_list); +void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct list_head *flow_list); +bool mlx5e_encap_take(struct mlx5e_encap_entry *e); +void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e); + +void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list); +void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list); + +struct mlx5e_neigh_hash_entry; +struct mlx5e_encap_entry * +mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe, + struct mlx5e_encap_entry *e); +void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe); + +void mlx5e_tc_reoffload_flows_work(struct work_struct *work); + +enum mlx5e_tc_attr_to_reg { + MAPPED_OBJ_TO_REG, + VPORT_TO_REG, + TUNNEL_TO_REG, + CTSTATE_TO_REG, + ZONE_TO_REG, + ZONE_RESTORE_TO_REG, + MARK_TO_REG, + LABELS_TO_REG, + FTEID_TO_REG, + NIC_MAPPED_OBJ_TO_REG, + NIC_ZONE_RESTORE_TO_REG, + PACKET_COLOR_TO_REG, +}; + +struct mlx5e_tc_attr_to_reg_mapping { + int mfield; /* rewrite field */ + int moffset; /* bit offset of mfield */ + int mlen; /* bits to rewrite/match */ + + int soffset; /* byte offset of spec for match */ +}; + +extern struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[]; + +#define MLX5_REG_MAPPING_MOFFSET(reg_id) (mlx5e_tc_attr_to_reg_mappings[reg_id].moffset) +#define MLX5_REG_MAPPING_MBITS(reg_id) (mlx5e_tc_attr_to_reg_mappings[reg_id].mlen) +#define MLX5_REG_MAPPING_MASK(reg_id) (GENMASK(mlx5e_tc_attr_to_reg_mappings[reg_id].mlen - 1, 0)) + +bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, + struct net_device *out_dev); + +int mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5_flow_namespace_type ns, + enum mlx5e_tc_attr_to_reg type, + u32 data); + +void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5e_tc_attr_to_reg type, + int act_id, u32 data); + +void mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, + enum mlx5e_tc_attr_to_reg type, + u32 data, + u32 mask); + +void mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec, + enum mlx5e_tc_attr_to_reg type, + u32 *data, + u32 *mask); + +int mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5_flow_namespace_type ns, + enum mlx5e_tc_attr_to_reg type, + u32 data); + +int mlx5e_tc_attach_mod_hdr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr); + +void mlx5e_tc_detach_mod_hdr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr); + +void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev, + struct flow_match_basic *match, bool outer, + void *headers_c, void *headers_v); + +int mlx5e_tc_nic_init(struct mlx5e_priv *priv); +void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv); + +int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv); + +struct mlx5_flow_handle * +mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); +void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr); + +struct mlx5_flow_handle * +mlx5_tc_rule_insert(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); +void +mlx5_tc_rule_delete(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr); + +bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev); +int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, + u16 *vport); + +int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + int ifindex, + enum mlx5e_tc_int_port_type type, + u32 *action, + int out_index); +#else /* CONFIG_MLX5_CLS_ACT */ +static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } +static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} +static inline int mlx5e_tc_ht_init(struct rhashtable *tc_ht) { return 0; } +static inline void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht) {} +static inline int +mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) +{ return -EOPNOTSUPP; } + +#endif /* CONFIG_MLX5_CLS_ACT */ + +struct mlx5_flow_attr *mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type); + +struct mlx5_flow_handle * +mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); +void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr); + +#else /* CONFIG_MLX5_ESWITCH */ +static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } +static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} +static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv, + unsigned long flags) +{ + return 0; +} + +static inline int +mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) +{ return -EOPNOTSUPP; } +#endif + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) +struct mlx5e_tc_table *mlx5e_tc_table_alloc(void); +void mlx5e_tc_table_free(struct mlx5e_tc_table *tc); +static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe) +{ + u32 chain, reg_b; + + reg_b = be32_to_cpu(cqe->ft_metadata); + + if (reg_b >> (MLX5E_TC_TABLE_CHAIN_TAG_BITS + ESW_ZONE_ID_BITS)) + return false; + + chain = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK; + if (chain) + return true; + + return false; +} + +bool mlx5e_tc_update_skb_nic(struct mlx5_cqe64 *cqe, struct sk_buff *skb); +bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb, + struct mapping_ctx *mapping_ctx, u32 mapped_obj_id, + struct mlx5_tc_ct_priv *ct_priv, + u32 zone_restore_id, u32 tunnel_id, + struct mlx5e_tc_update_priv *tc_priv); +#else /* CONFIG_MLX5_CLS_ACT */ +static inline struct mlx5e_tc_table *mlx5e_tc_table_alloc(void) { return NULL; } +static inline void mlx5e_tc_table_free(struct mlx5e_tc_table *tc) {} +static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe) +{ return false; } +static inline bool +mlx5e_tc_update_skb_nic(struct mlx5_cqe64 *cqe, struct sk_buff *skb) +{ return true; } +#endif + +int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, + u64 act_miss_cookie, u32 *act_miss_mapping); +void mlx5e_tc_action_miss_mapping_put(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, + u32 act_miss_mapping); + +#endif /* __MLX5_EN_TC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c new file mode 100644 index 0000000000..f0b506e562 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -0,0 +1,1051 @@ +/* + * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include "en.h" +#include "en/txrx.h" +#include "ipoib/ipoib.h" +#include "en_accel/en_accel.h" +#include "en_accel/ipsec_rxtx.h" +#include "en_accel/macsec.h" +#include "en/ptp.h" +#include + +static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma) +{ + int i; + + for (i = 0; i < num_dma; i++) { + struct mlx5e_sq_dma *last_pushed_dma = + mlx5e_dma_get(sq, --sq->dma_fifo_pc); + + mlx5e_tx_dma_unmap(sq->pdev, last_pushed_dma); + } +} + +static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb) +{ +#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN) + + return max(skb_network_offset(skb), MLX5E_MIN_INLINE); +} + +static inline int mlx5e_skb_l3_header_offset(struct sk_buff *skb) +{ + if (skb_transport_header_was_set(skb)) + return skb_transport_offset(skb); + else + return mlx5e_skb_l2_header_offset(skb); +} + +static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode, + struct sk_buff *skb) +{ + u16 hlen; + + switch (mode) { + case MLX5_INLINE_MODE_NONE: + return 0; + case MLX5_INLINE_MODE_TCP_UDP: + hlen = eth_get_headlen(skb->dev, skb->data, skb_headlen(skb)); + if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb)) + hlen += VLAN_HLEN; + break; + case MLX5_INLINE_MODE_IP: + hlen = mlx5e_skb_l3_header_offset(skb); + break; + case MLX5_INLINE_MODE_L2: + default: + hlen = mlx5e_skb_l2_header_offset(skb); + } + return min_t(u16, hlen, skb_headlen(skb)); +} + +#define MLX5_UNSAFE_MEMCPY_DISCLAIMER \ + "This copy has been bounds-checked earlier in " \ + "mlx5i_sq_calc_wqe_attr() and intentionally " \ + "crosses a flex array boundary. Since it is " \ + "performance sensitive, splitting the copy is " \ + "undesirable." + +static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs) +{ + struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start; + int cpy1_sz = 2 * ETH_ALEN; + int cpy2_sz = ihs - cpy1_sz; + + memcpy(&vhdr->addrs, skb->data, cpy1_sz); + vhdr->h_vlan_proto = skb->vlan_proto; + vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb)); + unsafe_memcpy(&vhdr->h_vlan_encapsulated_proto, + skb->data + cpy1_sz, + cpy2_sz, + MLX5_UNSAFE_MEMCPY_DISCLAIMER); +} + +static inline void +mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5e_accel_tx_state *accel, + struct mlx5_wqe_eth_seg *eseg) +{ + if (unlikely(mlx5e_ipsec_txwqe_build_eseg_csum(sq, skb, eseg))) + return; + + if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; + if (skb->encapsulation) { + eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM | + MLX5_ETH_WQE_L4_INNER_CSUM; + sq->stats->csum_partial_inner++; + } else { + eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; + sq->stats->csum_partial++; + } +#ifdef CONFIG_MLX5_EN_TLS + } else if (unlikely(accel && accel->tls.tls_tisn)) { + eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; + sq->stats->csum_partial++; +#endif + } else + sq->stats->csum_none++; +} + +/* Returns the number of header bytes that we plan + * to inline later in the transmit descriptor + */ +static inline u16 +mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb, int *hopbyhop) +{ + struct mlx5e_sq_stats *stats = sq->stats; + u16 ihs; + + *hopbyhop = 0; + if (skb->encapsulation) { + ihs = skb_inner_tcp_all_headers(skb); + stats->tso_inner_packets++; + stats->tso_inner_bytes += skb->len - ihs; + } else { + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { + ihs = skb_transport_offset(skb) + sizeof(struct udphdr); + } else { + ihs = skb_tcp_all_headers(skb); + if (ipv6_has_hopopt_jumbo(skb)) { + *hopbyhop = sizeof(struct hop_jumbo_hdr); + ihs -= sizeof(struct hop_jumbo_hdr); + } + } + stats->tso_packets++; + stats->tso_bytes += skb->len - ihs - *hopbyhop; + } + + return ihs; +} + +static inline int +mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb, + unsigned char *skb_data, u16 headlen, + struct mlx5_wqe_data_seg *dseg) +{ + dma_addr_t dma_addr = 0; + u8 num_dma = 0; + int i; + + if (headlen) { + dma_addr = dma_map_single(sq->pdev, skb_data, headlen, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) + goto dma_unmap_wqe_err; + + dseg->addr = cpu_to_be64(dma_addr); + dseg->lkey = sq->mkey_be; + dseg->byte_count = cpu_to_be32(headlen); + + mlx5e_dma_push(sq, dma_addr, headlen, MLX5E_DMA_MAP_SINGLE); + num_dma++; + dseg++; + } + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + int fsz = skb_frag_size(frag); + + dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) + goto dma_unmap_wqe_err; + + dseg->addr = cpu_to_be64(dma_addr); + dseg->lkey = sq->mkey_be; + dseg->byte_count = cpu_to_be32(fsz); + + mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); + num_dma++; + dseg++; + } + + return num_dma; + +dma_unmap_wqe_err: + mlx5e_dma_unmap_wqe_err(sq, num_dma); + return -ENOMEM; +} + +struct mlx5e_tx_attr { + u32 num_bytes; + u16 headlen; + u16 ihs; + __be16 mss; + u16 insz; + u8 opcode; + u8 hopbyhop; +}; + +struct mlx5e_tx_wqe_attr { + u16 ds_cnt; + u16 ds_cnt_inl; + u16 ds_cnt_ids; + u8 num_wqebbs; +}; + +static u8 +mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5e_accel_tx_state *accel) +{ + u8 mode; + +#ifdef CONFIG_MLX5_EN_TLS + if (accel && accel->tls.tls_tisn) + return MLX5_INLINE_MODE_TCP_UDP; +#endif + + mode = sq->min_inline_mode; + + if (skb_vlan_tag_present(skb) && + test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state)) + mode = max_t(u8, MLX5_INLINE_MODE_L2, mode); + + return mode; +} + +static void mlx5e_sq_xmit_prepare(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5e_accel_tx_state *accel, + struct mlx5e_tx_attr *attr) +{ + struct mlx5e_sq_stats *stats = sq->stats; + + if (skb_is_gso(skb)) { + int hopbyhop; + u16 ihs = mlx5e_tx_get_gso_ihs(sq, skb, &hopbyhop); + + *attr = (struct mlx5e_tx_attr) { + .opcode = MLX5_OPCODE_LSO, + .mss = cpu_to_be16(skb_shinfo(skb)->gso_size), + .ihs = ihs, + .num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs, + .headlen = skb_headlen(skb) - ihs - hopbyhop, + .hopbyhop = hopbyhop, + }; + + stats->packets += skb_shinfo(skb)->gso_segs; + } else { + u8 mode = mlx5e_tx_wqe_inline_mode(sq, skb, accel); + u16 ihs = mlx5e_calc_min_inline(mode, skb); + + *attr = (struct mlx5e_tx_attr) { + .opcode = MLX5_OPCODE_SEND, + .mss = cpu_to_be16(0), + .ihs = ihs, + .num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN), + .headlen = skb_headlen(skb) - ihs, + }; + + stats->packets++; + } + + attr->insz = mlx5e_accel_tx_ids_len(sq, accel); + stats->bytes += attr->num_bytes; +} + +static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_attr *attr, + struct mlx5e_tx_wqe_attr *wqe_attr) +{ + u16 ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT; + u16 ds_cnt_inl = 0; + u16 ds_cnt_ids = 0; + + /* Sync the calculation with MLX5E_MAX_TX_WQEBBS. */ + + if (attr->insz) + ds_cnt_ids = DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + attr->insz, + MLX5_SEND_WQE_DS); + + ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags + ds_cnt_ids; + if (attr->ihs) { + u16 inl = attr->ihs - INL_HDR_START_SZ; + + if (skb_vlan_tag_present(skb)) + inl += VLAN_HLEN; + + ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS); + if (WARN_ON_ONCE(ds_cnt_inl > MLX5E_MAX_TX_INLINE_DS)) + netdev_warn(skb->dev, "ds_cnt_inl = %u > max %u\n", ds_cnt_inl, + (u16)MLX5E_MAX_TX_INLINE_DS); + ds_cnt += ds_cnt_inl; + } + + *wqe_attr = (struct mlx5e_tx_wqe_attr) { + .ds_cnt = ds_cnt, + .ds_cnt_inl = ds_cnt_inl, + .ds_cnt_ids = ds_cnt_ids, + .num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS), + }; +} + +static void mlx5e_tx_skb_update_hwts_flags(struct sk_buff *skb) +{ + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; +} + +static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq) +{ + if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room))) { + netif_tx_stop_queue(sq->txq); + sq->stats->stopped++; + } +} + +static void mlx5e_tx_flush(struct mlx5e_txqsq *sq) +{ + struct mlx5e_tx_wqe_info *wi; + struct mlx5e_tx_wqe *wqe; + u16 pi; + + /* Must not be called when a MPWQE session is active but empty. */ + mlx5e_tx_mpwqe_ensure_complete(sq); + + pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); + wi = &sq->db.wqe_info[pi]; + + *wi = (struct mlx5e_tx_wqe_info) { + .num_wqebbs = 1, + }; + + wqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl); +} + +static inline void +mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, + const struct mlx5e_tx_attr *attr, + const struct mlx5e_tx_wqe_attr *wqe_attr, u8 num_dma, + struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg, + struct mlx5_wqe_eth_seg *eseg, bool xmit_more) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + bool send_doorbell; + + *wi = (struct mlx5e_tx_wqe_info) { + .skb = skb, + .num_bytes = attr->num_bytes, + .num_dma = num_dma, + .num_wqebbs = wqe_attr->num_wqebbs, + .num_fifo_pkts = 0, + }; + + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | attr->opcode); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | wqe_attr->ds_cnt); + + mlx5e_tx_skb_update_hwts_flags(skb); + + sq->pc += wi->num_wqebbs; + + mlx5e_tx_check_stop(sq); + + if (unlikely(sq->ptpsq && + (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) { + u8 metadata_index = be32_to_cpu(eseg->flow_table_metadata); + + mlx5e_skb_cb_hwtstamp_init(skb); + mlx5e_ptp_metadata_map_put(&sq->ptpsq->metadata_map, skb, + metadata_index); + mlx5e_ptpsq_track_metadata(sq->ptpsq, metadata_index); + if (!netif_tx_queue_stopped(sq->txq) && + mlx5e_ptpsq_metadata_freelist_empty(sq->ptpsq)) { + netif_tx_stop_queue(sq->txq); + sq->stats->stopped++; + } + skb_get(skb); + } + + send_doorbell = __netdev_tx_sent_queue(sq->txq, attr->num_bytes, xmit_more); + if (send_doorbell) + mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg); +} + +static void +mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, + const struct mlx5e_tx_attr *attr, const struct mlx5e_tx_wqe_attr *wqe_attr, + struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more) +{ + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5_wqe_eth_seg *eseg; + struct mlx5_wqe_data_seg *dseg; + struct mlx5e_tx_wqe_info *wi; + u16 ihs = attr->ihs; + struct ipv6hdr *h6; + struct mlx5e_sq_stats *stats = sq->stats; + int num_dma; + + stats->xmit_more += xmit_more; + + /* fill wqe */ + wi = &sq->db.wqe_info[pi]; + cseg = &wqe->ctrl; + eseg = &wqe->eth; + dseg = wqe->data; + + eseg->mss = attr->mss; + + if (ihs) { + u8 *start = eseg->inline_hdr.start; + + if (unlikely(attr->hopbyhop)) { + /* remove the HBH header. + * Layout: [Ethernet header][IPv6 header][HBH][TCP header] + */ + if (skb_vlan_tag_present(skb)) { + mlx5e_insert_vlan(start, skb, ETH_HLEN + sizeof(*h6)); + ihs += VLAN_HLEN; + h6 = (struct ipv6hdr *)(start + sizeof(struct vlan_ethhdr)); + } else { + unsafe_memcpy(start, skb->data, + ETH_HLEN + sizeof(*h6), + MLX5_UNSAFE_MEMCPY_DISCLAIMER); + h6 = (struct ipv6hdr *)(start + ETH_HLEN); + } + h6->nexthdr = IPPROTO_TCP; + /* Copy the TCP header after the IPv6 one */ + memcpy(h6 + 1, + skb->data + ETH_HLEN + sizeof(*h6) + + sizeof(struct hop_jumbo_hdr), + tcp_hdrlen(skb)); + /* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */ + } else if (skb_vlan_tag_present(skb)) { + mlx5e_insert_vlan(start, skb, ihs); + ihs += VLAN_HLEN; + stats->added_vlan_packets++; + } else { + unsafe_memcpy(eseg->inline_hdr.start, skb->data, + attr->ihs, + MLX5_UNSAFE_MEMCPY_DISCLAIMER); + } + eseg->inline_hdr.sz |= cpu_to_be16(ihs); + dseg += wqe_attr->ds_cnt_inl; + } else if (skb_vlan_tag_present(skb)) { + eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN); + if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD)) + eseg->insert.type |= cpu_to_be16(MLX5_ETH_WQE_SVLAN); + eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb)); + stats->added_vlan_packets++; + } + + dseg += wqe_attr->ds_cnt_ids; + num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr->ihs + attr->hopbyhop, + attr->headlen, dseg); + if (unlikely(num_dma < 0)) + goto err_drop; + + mlx5e_txwqe_complete(sq, skb, attr, wqe_attr, num_dma, wi, cseg, eseg, xmit_more); + + return; + +err_drop: + stats->dropped++; + if (unlikely(sq->ptpsq && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) + mlx5e_ptp_metadata_fifo_push(&sq->ptpsq->metadata_freelist, + be32_to_cpu(eseg->flow_table_metadata)); + dev_kfree_skb_any(skb); + mlx5e_tx_flush(sq); +} + +static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr) +{ + return !skb_is_nonlinear(skb) && !skb_vlan_tag_present(skb) && !attr->ihs && + !attr->insz && !mlx5e_macsec_skb_is_offload(skb); +} + +static bool mlx5e_tx_mpwqe_same_eseg(struct mlx5e_txqsq *sq, struct mlx5_wqe_eth_seg *eseg) +{ + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; + + /* Assumes the session is already running and has at least one packet. */ + return !memcmp(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN); +} + +static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq, + struct mlx5_wqe_eth_seg *eseg) +{ + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; + struct mlx5e_tx_wqe *wqe; + u16 pi; + + pi = mlx5e_txqsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs); + wqe = MLX5E_TX_FETCH_WQE(sq, pi); + net_prefetchw(wqe->data); + + *session = (struct mlx5e_tx_mpwqe) { + .wqe = wqe, + .bytes_count = 0, + .ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT, + .pkt_count = 0, + .inline_on = 0, + }; + + memcpy(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN); + + sq->stats->mpwqe_blks++; +} + +static bool mlx5e_tx_mpwqe_session_is_active(struct mlx5e_txqsq *sq) +{ + return sq->mpwqe.wqe; +} + +static void mlx5e_tx_mpwqe_add_dseg(struct mlx5e_txqsq *sq, struct mlx5e_xmit_data *txd) +{ + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; + struct mlx5_wqe_data_seg *dseg; + + dseg = (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count; + + session->pkt_count++; + session->bytes_count += txd->len; + + dseg->addr = cpu_to_be64(txd->dma_addr); + dseg->byte_count = cpu_to_be32(txd->len); + dseg->lkey = sq->mkey_be; + session->ds_count++; + + sq->stats->mpwqe_pkts++; +} + +static struct mlx5_wqe_ctrl_seg *mlx5e_tx_mpwqe_session_complete(struct mlx5e_txqsq *sq) +{ + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; + u8 ds_count = session->ds_count; + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5e_tx_wqe_info *wi; + u16 pi; + + cseg = &session->wqe->ctrl; + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count); + + pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); + wi = &sq->db.wqe_info[pi]; + *wi = (struct mlx5e_tx_wqe_info) { + .skb = NULL, + .num_bytes = session->bytes_count, + .num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS), + .num_dma = session->pkt_count, + .num_fifo_pkts = session->pkt_count, + }; + + sq->pc += wi->num_wqebbs; + + session->wqe = NULL; + + mlx5e_tx_check_stop(sq); + + return cseg; +} + +static void +mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg, bool xmit_more) +{ + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5e_xmit_data txd; + + txd.data = skb->data; + txd.len = skb->len; + + txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr))) + goto err_unmap; + + if (!mlx5e_tx_mpwqe_session_is_active(sq)) { + mlx5e_tx_mpwqe_session_start(sq, eseg); + } else if (!mlx5e_tx_mpwqe_same_eseg(sq, eseg)) { + mlx5e_tx_mpwqe_session_complete(sq); + mlx5e_tx_mpwqe_session_start(sq, eseg); + } + + sq->stats->xmit_more += xmit_more; + + mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE); + mlx5e_skb_fifo_push(&sq->db.skb_fifo, skb); + mlx5e_tx_mpwqe_add_dseg(sq, &txd); + mlx5e_tx_skb_update_hwts_flags(skb); + + if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe, sq->max_sq_mpw_wqebbs))) { + /* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */ + cseg = mlx5e_tx_mpwqe_session_complete(sq); + + if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more)) + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); + } else if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more)) { + /* Might stop the queue, but we were asked to ring the doorbell anyway. */ + cseg = mlx5e_tx_mpwqe_session_complete(sq); + + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); + } + + return; + +err_unmap: + mlx5e_dma_unmap_wqe_err(sq, 1); + sq->stats->dropped++; + dev_kfree_skb_any(skb); + mlx5e_tx_flush(sq); +} + +void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq) +{ + /* Unlikely in non-MPWQE workloads; not important in MPWQE workloads. */ + if (unlikely(mlx5e_tx_mpwqe_session_is_active(sq))) + mlx5e_tx_mpwqe_session_complete(sq); +} + +static void mlx5e_cqe_ts_id_eseg(struct mlx5e_ptpsq *ptpsq, struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg) +{ + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) + eseg->flow_table_metadata = + cpu_to_be32(mlx5e_ptp_metadata_fifo_pop(&ptpsq->metadata_freelist)); +} + +static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq, + struct sk_buff *skb, struct mlx5e_accel_tx_state *accel, + struct mlx5_wqe_eth_seg *eseg, u16 ihs) +{ + mlx5e_accel_tx_eseg(priv, skb, eseg, ihs); + mlx5e_txwqe_build_eseg_csum(sq, skb, accel, eseg); + if (unlikely(sq->ptpsq)) + mlx5e_cqe_ts_id_eseg(sq->ptpsq, skb, eseg); +} + +netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_accel_tx_state accel = {}; + struct mlx5e_tx_wqe_attr wqe_attr; + struct mlx5e_tx_attr attr; + struct mlx5e_tx_wqe *wqe; + struct mlx5e_txqsq *sq; + u16 pi; + + /* All changes to txq2sq are performed in sync with mlx5e_xmit, when the + * queue being changed is disabled, and smp_wmb guarantees that the + * changes are visible before mlx5e_xmit tries to read from txq2sq. It + * guarantees that the value of txq2sq[qid] doesn't change while + * mlx5e_xmit is running on queue number qid. smb_wmb is paired with + * HARD_TX_LOCK around ndo_start_xmit, which serves as an ACQUIRE. + */ + sq = priv->txq2sq[skb_get_queue_mapping(skb)]; + if (unlikely(!sq)) { + /* Two cases when sq can be NULL: + * 1. The HTB node is registered, and mlx5e_select_queue + * selected its queue ID, but the SQ itself is not yet created. + * 2. HTB SQ creation failed. Similar to the previous case, but + * the SQ won't be created. + */ + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + /* May send SKBs and WQEs. */ + if (unlikely(!mlx5e_accel_tx_begin(dev, sq, skb, &accel))) + return NETDEV_TX_OK; + + mlx5e_sq_xmit_prepare(sq, skb, &accel, &attr); + + if (test_bit(MLX5E_SQ_STATE_MPWQE, &sq->state)) { + if (mlx5e_tx_skb_supports_mpwqe(skb, &attr)) { + struct mlx5_wqe_eth_seg eseg = {}; + + mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &eseg, attr.ihs); + mlx5e_sq_xmit_mpwqe(sq, skb, &eseg, netdev_xmit_more()); + return NETDEV_TX_OK; + } + + mlx5e_tx_mpwqe_ensure_complete(sq); + } + + mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr); + pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs); + wqe = MLX5E_TX_FETCH_WQE(sq, pi); + + /* May update the WQE, but may not post other WQEs. */ + mlx5e_accel_tx_finish(sq, wqe, &accel, + (struct mlx5_wqe_inline_seg *)(wqe->data + wqe_attr.ds_cnt_inl)); + mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &wqe->eth, attr.ihs); + mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, netdev_xmit_more()); + + return NETDEV_TX_OK; +} + +static void mlx5e_tx_wi_dma_unmap(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, + u32 *dma_fifo_cc) +{ + int i; + + for (i = 0; i < wi->num_dma; i++) { + struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++); + + mlx5e_tx_dma_unmap(sq->pdev, dma); + } +} + +static void mlx5e_consume_skb(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_cqe64 *cqe, int napi_budget) +{ + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + struct skb_shared_hwtstamps hwts = {}; + u64 ts = get_cqe_ts(cqe); + + hwts.hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, ts); + if (sq->ptpsq) + mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_CQE_HWTSTAMP, + hwts.hwtstamp, sq->ptpsq->cq_stats); + else + skb_tstamp_tx(skb, &hwts); + } + + napi_consume_skb(skb, napi_budget); +} + +static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, + struct mlx5_cqe64 *cqe, int napi_budget) +{ + int i; + + for (i = 0; i < wi->num_fifo_pkts; i++) { + struct sk_buff *skb = mlx5e_skb_fifo_pop(&sq->db.skb_fifo); + + mlx5e_consume_skb(sq, skb, cqe, napi_budget); + } +} + +void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq) +{ + if (netif_tx_queue_stopped(sq->txq) && + mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) && + !mlx5e_ptpsq_metadata_freelist_empty(sq->ptpsq) && + !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) { + netif_tx_wake_queue(sq->txq); + sq->stats->wake++; + } +} + +bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) +{ + struct mlx5e_sq_stats *stats; + struct mlx5e_txqsq *sq; + struct mlx5_cqe64 *cqe; + u32 dma_fifo_cc; + u32 nbytes; + u16 npkts; + u16 sqcc; + int i; + + sq = container_of(cq, struct mlx5e_txqsq, cq); + + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + return false; + + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (!cqe) + return false; + + stats = sq->stats; + + npkts = 0; + nbytes = 0; + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + sqcc = sq->cc; + + /* avoid dirtying sq cache line every cqe */ + dma_fifo_cc = sq->dma_fifo_cc; + + i = 0; + do { + struct mlx5e_tx_wqe_info *wi; + u16 wqe_counter; + bool last_wqe; + u16 ci; + + mlx5_cqwq_pop(&cq->wq); + + wqe_counter = be16_to_cpu(cqe->wqe_counter); + + do { + last_wqe = (sqcc == wqe_counter); + + ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); + wi = &sq->db.wqe_info[ci]; + + sqcc += wi->num_wqebbs; + + if (likely(wi->skb)) { + mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); + mlx5e_consume_skb(sq, wi->skb, cqe, napi_budget); + + npkts++; + nbytes += wi->num_bytes; + continue; + } + + if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, + &dma_fifo_cc))) + continue; + + if (wi->num_fifo_pkts) { + mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); + mlx5e_tx_wi_consume_fifo_skbs(sq, wi, cqe, napi_budget); + + npkts += wi->num_fifo_pkts; + nbytes += wi->num_bytes; + } + } while (!last_wqe); + + if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) { + if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, + &sq->state)) { + mlx5e_dump_error_cqe(&sq->cq, sq->sqn, + (struct mlx5_err_cqe *)cqe); + mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); + queue_work(cq->priv->wq, &sq->recover_work); + } + stats->cqe_err++; + } + + } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); + + stats->cqes += i; + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + sq->dma_fifo_cc = dma_fifo_cc; + sq->cc = sqcc; + + netdev_tx_completed_queue(sq->txq, npkts, nbytes); + + mlx5e_txqsq_wake(sq); + + return (i == MLX5E_TX_CQ_POLL_BUDGET); +} + +static void mlx5e_tx_wi_kfree_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi) +{ + int i; + + for (i = 0; i < wi->num_fifo_pkts; i++) + dev_kfree_skb_any(mlx5e_skb_fifo_pop(&sq->db.skb_fifo)); +} + +void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq) +{ + struct mlx5e_tx_wqe_info *wi; + u32 dma_fifo_cc, nbytes = 0; + u16 ci, sqcc, npkts = 0; + + sqcc = sq->cc; + dma_fifo_cc = sq->dma_fifo_cc; + + while (sqcc != sq->pc) { + ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); + wi = &sq->db.wqe_info[ci]; + + sqcc += wi->num_wqebbs; + + if (likely(wi->skb)) { + mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); + dev_kfree_skb_any(wi->skb); + + npkts++; + nbytes += wi->num_bytes; + continue; + } + + if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc))) + continue; + + if (wi->num_fifo_pkts) { + mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); + mlx5e_tx_wi_kfree_fifo_skbs(sq, wi); + + npkts += wi->num_fifo_pkts; + nbytes += wi->num_bytes; + } + } + + sq->dma_fifo_cc = dma_fifo_cc; + sq->cc = sqcc; + + netdev_tx_completed_queue(sq->txq, npkts, nbytes); +} + +#ifdef CONFIG_MLX5_CORE_IPOIB +static inline void +mlx5i_txwqe_build_datagram(struct mlx5_av *av, u32 dqpn, u32 dqkey, + struct mlx5_wqe_datagram_seg *dseg) +{ + memcpy(&dseg->av, av, sizeof(struct mlx5_av)); + dseg->av.dqp_dct = cpu_to_be32(dqpn | MLX5_EXTENDED_UD_AV); + dseg->av.key.qkey.qkey = cpu_to_be32(dqkey); +} + +static void mlx5i_sq_calc_wqe_attr(struct sk_buff *skb, + const struct mlx5e_tx_attr *attr, + struct mlx5e_tx_wqe_attr *wqe_attr) +{ + u16 ds_cnt = sizeof(struct mlx5i_tx_wqe) / MLX5_SEND_WQE_DS; + u16 ds_cnt_inl = 0; + + ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags; + + if (attr->ihs) { + u16 inl = attr->ihs - INL_HDR_START_SZ; + + ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS); + ds_cnt += ds_cnt_inl; + } + + *wqe_attr = (struct mlx5e_tx_wqe_attr) { + .ds_cnt = ds_cnt, + .ds_cnt_inl = ds_cnt_inl, + .num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS), + }; +} + +void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more) +{ + struct mlx5e_tx_wqe_attr wqe_attr; + struct mlx5e_tx_attr attr; + struct mlx5i_tx_wqe *wqe; + + struct mlx5_wqe_datagram_seg *datagram; + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5_wqe_eth_seg *eseg; + struct mlx5_wqe_data_seg *dseg; + struct mlx5e_tx_wqe_info *wi; + + struct mlx5e_sq_stats *stats = sq->stats; + int num_dma; + u16 pi; + + mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr); + mlx5i_sq_calc_wqe_attr(skb, &attr, &wqe_attr); + + pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs); + wqe = MLX5I_SQ_FETCH_WQE(sq, pi); + + stats->xmit_more += xmit_more; + + /* fill wqe */ + wi = &sq->db.wqe_info[pi]; + cseg = &wqe->ctrl; + datagram = &wqe->datagram; + eseg = &wqe->eth; + dseg = wqe->data; + + mlx5i_txwqe_build_datagram(av, dqpn, dqkey, datagram); + + mlx5e_txwqe_build_eseg_csum(sq, skb, NULL, eseg); + + eseg->mss = attr.mss; + + if (attr.ihs) { + if (unlikely(attr.hopbyhop)) { + struct ipv6hdr *h6; + + /* remove the HBH header. + * Layout: [Ethernet header][IPv6 header][HBH][TCP header] + */ + unsafe_memcpy(eseg->inline_hdr.start, skb->data, + ETH_HLEN + sizeof(*h6), + MLX5_UNSAFE_MEMCPY_DISCLAIMER); + h6 = (struct ipv6hdr *)((char *)eseg->inline_hdr.start + ETH_HLEN); + h6->nexthdr = IPPROTO_TCP; + /* Copy the TCP header after the IPv6 one */ + unsafe_memcpy(h6 + 1, + skb->data + ETH_HLEN + sizeof(*h6) + + sizeof(struct hop_jumbo_hdr), + tcp_hdrlen(skb), + MLX5_UNSAFE_MEMCPY_DISCLAIMER); + /* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */ + } else { + unsafe_memcpy(eseg->inline_hdr.start, skb->data, + attr.ihs, + MLX5_UNSAFE_MEMCPY_DISCLAIMER); + } + eseg->inline_hdr.sz = cpu_to_be16(attr.ihs); + dseg += wqe_attr.ds_cnt_inl; + } + + num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr.ihs + attr.hopbyhop, + attr.headlen, dseg); + if (unlikely(num_dma < 0)) + goto err_drop; + + mlx5e_txwqe_complete(sq, skb, &attr, &wqe_attr, num_dma, wi, cseg, eseg, xmit_more); + + return; + +err_drop: + stats->dropped++; + dev_kfree_skb_any(skb); + mlx5e_tx_flush(sq); +} +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c new file mode 100644 index 0000000000..a7d9b7cb42 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include "en.h" +#include "en/txrx.h" +#include "en/xdp.h" +#include "en/xsk/rx.h" +#include "en/xsk/tx.h" +#include "en_accel/ktls_txrx.h" + +static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) +{ + int current_cpu = smp_processor_id(); + + return cpumask_test_cpu(current_cpu, c->aff_mask); +} + +static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq) +{ + struct mlx5e_sq_stats *stats = sq->stats; + struct dim_sample dim_sample = {}; + + if (unlikely(!test_bit(MLX5E_SQ_STATE_DIM, &sq->state))) + return; + + dim_update_sample(sq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); + net_dim(&sq->dim, dim_sample); +} + +static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq) +{ + struct mlx5e_rq_stats *stats = rq->stats; + struct dim_sample dim_sample = {}; + + if (unlikely(!test_bit(MLX5E_RQ_STATE_DIM, &rq->state))) + return; + + dim_update_sample(rq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); + net_dim(&rq->dim, dim_sample); +} + +void mlx5e_trigger_irq(struct mlx5e_icosq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5e_tx_wqe *nopwqe; + u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + + sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { + .wqe_type = MLX5E_ICOSQ_WQE_NOP, + .num_wqebbs = 1, + }; + + nopwqe = mlx5e_post_nop(wq, sq->sqn, &sq->pc); + mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nopwqe->ctrl); +} + +static bool mlx5e_napi_xsk_post(struct mlx5e_xdpsq *xsksq, struct mlx5e_rq *xskrq) +{ + bool need_wakeup = xsk_uses_need_wakeup(xskrq->xsk_pool); + bool busy_xsk = false, xsk_rx_alloc_err; + + /* If SQ is empty, there are no TX completions to trigger NAPI, so set + * need_wakeup. Do it before queuing packets for TX to avoid race + * condition with userspace. + */ + if (need_wakeup && xsksq->pc == xsksq->cc) + xsk_set_tx_need_wakeup(xsksq->xsk_pool); + busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET); + /* If we queued some packets for TX, no need for wakeup anymore. */ + if (need_wakeup && xsksq->pc != xsksq->cc) + xsk_clear_tx_need_wakeup(xsksq->xsk_pool); + + /* If WQ is empty, RX won't trigger NAPI, so set need_wakeup. Do it + * before refilling to avoid race condition with userspace. + */ + if (need_wakeup && !mlx5e_rqwq_get_cur_sz(xskrq)) + xsk_set_rx_need_wakeup(xskrq->xsk_pool); + xsk_rx_alloc_err = INDIRECT_CALL_2(xskrq->post_wqes, + mlx5e_post_rx_mpwqes, + mlx5e_post_rx_wqes, + xskrq); + /* Ask for wakeup if WQ is not full after refill. */ + if (!need_wakeup) + busy_xsk |= xsk_rx_alloc_err; + else if (xsk_rx_alloc_err) + xsk_set_rx_need_wakeup(xskrq->xsk_pool); + else + xsk_clear_rx_need_wakeup(xskrq->xsk_pool); + + return busy_xsk; +} + +int mlx5e_napi_poll(struct napi_struct *napi, int budget) +{ + struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, + napi); + struct mlx5e_ch_stats *ch_stats = c->stats; + struct mlx5e_xdpsq *xsksq = &c->xsksq; + struct mlx5e_txqsq __rcu **qos_sqs; + struct mlx5e_rq *xskrq = &c->xskrq; + struct mlx5e_rq *rq = &c->rq; + bool aff_change = false; + bool busy_xsk = false; + bool busy = false; + int work_done = 0; + u16 qos_sqs_size; + bool xsk_open; + int i; + + rcu_read_lock(); + + qos_sqs = rcu_dereference(c->qos_sqs); + + xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state); + + ch_stats->poll++; + + for (i = 0; i < c->num_tc; i++) + busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget); + + if (unlikely(qos_sqs)) { + smp_rmb(); /* Pairs with mlx5e_qos_alloc_queues. */ + qos_sqs_size = READ_ONCE(c->qos_sqs_size); + + for (i = 0; i < qos_sqs_size; i++) { + struct mlx5e_txqsq *sq = rcu_dereference(qos_sqs[i]); + + if (sq) + busy |= mlx5e_poll_tx_cq(&sq->cq, budget); + } + } + + /* budget=0 means we may be in IRQ context, do as little as possible */ + if (unlikely(!budget)) + goto out; + + busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq); + + if (c->xdp) + busy |= mlx5e_poll_xdpsq_cq(&c->rq_xdpsq.cq); + + if (xsk_open) + work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget); + + if (likely(budget - work_done)) + work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done); + + busy |= work_done == budget; + + mlx5e_poll_ico_cq(&c->icosq.cq); + if (mlx5e_poll_ico_cq(&c->async_icosq.cq)) + /* Don't clear the flag if nothing was polled to prevent + * queueing more WQEs and overflowing the async ICOSQ. + */ + clear_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->async_icosq.state); + + /* Keep after async ICOSQ CQ poll */ + if (unlikely(mlx5e_ktls_rx_pending_resync_list(c, budget))) + busy |= mlx5e_ktls_rx_handle_resync_list(c, budget); + + busy |= INDIRECT_CALL_2(rq->post_wqes, + mlx5e_post_rx_mpwqes, + mlx5e_post_rx_wqes, + rq); + if (xsk_open) { + busy |= mlx5e_poll_xdpsq_cq(&xsksq->cq); + busy_xsk |= mlx5e_napi_xsk_post(xsksq, xskrq); + } + + busy |= busy_xsk; + + if (busy) { + if (likely(mlx5e_channel_no_affinity_change(c))) { + work_done = budget; + goto out; + } + ch_stats->aff_change++; + aff_change = true; + if (work_done == budget) + work_done--; + } + + if (unlikely(!napi_complete_done(napi, work_done))) + goto out; + + ch_stats->arm++; + + for (i = 0; i < c->num_tc; i++) { + mlx5e_handle_tx_dim(&c->sq[i]); + mlx5e_cq_arm(&c->sq[i].cq); + } + if (unlikely(qos_sqs)) { + for (i = 0; i < qos_sqs_size; i++) { + struct mlx5e_txqsq *sq = rcu_dereference(qos_sqs[i]); + + if (sq) { + mlx5e_handle_tx_dim(sq); + mlx5e_cq_arm(&sq->cq); + } + } + } + + mlx5e_handle_rx_dim(rq); + + mlx5e_cq_arm(&rq->cq); + mlx5e_cq_arm(&c->icosq.cq); + mlx5e_cq_arm(&c->async_icosq.cq); + mlx5e_cq_arm(&c->xdpsq.cq); + + if (xsk_open) { + mlx5e_handle_rx_dim(xskrq); + mlx5e_cq_arm(&xsksq->cq); + mlx5e_cq_arm(&xskrq->cq); + } + + if (unlikely(aff_change && busy_xsk)) { + mlx5e_trigger_irq(&c->icosq); + ch_stats->force_irq++; + } + +out: + rcu_read_unlock(); + + return work_done; +} + +void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe) +{ + struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); + + napi_schedule(cq->napi); + cq->event_ctr++; + cq->ch_stats->events++; +} + +void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event) +{ + struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); + struct net_device *netdev = cq->netdev; + + netdev_err(netdev, "%s: cqn=0x%.6x event=0x%.2x\n", + __func__, mcq->cqn, event); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c new file mode 100644 index 0000000000..40a6cb052a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -0,0 +1,1261 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2013-2021, Mellanox Technologies inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#ifdef CONFIG_RFS_ACCEL +#include +#endif +#include "mlx5_core.h" +#include "lib/eq.h" +#include "fpga/core.h" +#include "eswitch.h" +#include "lib/clock.h" +#include "diag/fw_tracer.h" +#include "mlx5_irq.h" +#include "pci_irq.h" +#include "devlink.h" +#include "en_accel/ipsec.h" + +enum { + MLX5_EQE_OWNER_INIT_VAL = 0x1, +}; + +enum { + MLX5_EQ_STATE_ARMED = 0x9, + MLX5_EQ_STATE_FIRED = 0xa, + MLX5_EQ_STATE_ALWAYS_ARMED = 0xb, +}; + +enum { + MLX5_EQ_DOORBEL_OFFSET = 0x40, +}; + +/* budget must be smaller than MLX5_NUM_SPARE_EQE to guarantee that we update + * the ci before we polled all the entries in the EQ. MLX5_NUM_SPARE_EQE is + * used to set the EQ size, budget must be smaller than the EQ size. + */ +enum { + MLX5_EQ_POLLING_BUDGET = 128, +}; + +static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE); + +struct mlx5_eq_table { + struct xarray comp_eqs; + struct mlx5_eq_async pages_eq; + struct mlx5_eq_async cmd_eq; + struct mlx5_eq_async async_eq; + + struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX]; + + /* Since CQ DB is stored in async_eq */ + struct mlx5_nb cq_err_nb; + + struct mutex lock; /* sync async eqs creations */ + struct mutex comp_lock; /* sync comp eqs creations */ + int curr_comp_eqs; + int max_comp_eqs; + struct mlx5_irq_table *irq_table; + struct xarray comp_irqs; + struct mlx5_irq *ctrl_irq; + struct cpu_rmap *rmap; + struct cpumask used_cpus; +}; + +#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ + (1ull << MLX5_EVENT_TYPE_COMM_EST) | \ + (1ull << MLX5_EVENT_TYPE_SQ_DRAINED) | \ + (1ull << MLX5_EVENT_TYPE_CQ_ERROR) | \ + (1ull << MLX5_EVENT_TYPE_WQ_CATAS_ERROR) | \ + (1ull << MLX5_EVENT_TYPE_PATH_MIG_FAILED) | \ + (1ull << MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \ + (1ull << MLX5_EVENT_TYPE_WQ_ACCESS_ERROR) | \ + (1ull << MLX5_EVENT_TYPE_PORT_CHANGE) | \ + (1ull << MLX5_EVENT_TYPE_SRQ_CATAS_ERROR) | \ + (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \ + (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)) + +static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_eq_in)] = {}; + + MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ); + MLX5_SET(destroy_eq_in, in, eq_number, eqn); + return mlx5_cmd_exec_in(dev, destroy_eq, in); +} + +/* caller must eventually call mlx5_cq_put on the returned cq */ +static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) +{ + struct mlx5_cq_table *table = &eq->cq_table; + struct mlx5_core_cq *cq = NULL; + + rcu_read_lock(); + cq = radix_tree_lookup(&table->tree, cqn); + if (likely(cq)) + mlx5_cq_hold(cq); + rcu_read_unlock(); + + return cq; +} + +static int mlx5_eq_comp_int(struct notifier_block *nb, + __always_unused unsigned long action, + __always_unused void *data) +{ + struct mlx5_eq_comp *eq_comp = + container_of(nb, struct mlx5_eq_comp, irq_nb); + struct mlx5_eq *eq = &eq_comp->core; + struct mlx5_eqe *eqe; + int num_eqes = 0; + u32 cqn = -1; + + eqe = next_eqe_sw(eq); + if (!eqe) + goto out; + + do { + struct mlx5_core_cq *cq; + + /* Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ + dma_rmb(); + /* Assume (eqe->type) is always MLX5_EVENT_TYPE_COMP */ + cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff; + + cq = mlx5_eq_cq_get(eq, cqn); + if (likely(cq)) { + ++cq->arm_sn; + cq->comp(cq, eqe); + mlx5_cq_put(cq); + } else { + dev_dbg_ratelimited(eq->dev->device, + "Completion event for bogus CQ 0x%x\n", cqn); + } + + ++eq->cons_index; + + } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq))); + +out: + eq_update_ci(eq, 1); + + if (cqn != -1) + tasklet_schedule(&eq_comp->tasklet_ctx.task); + + return 0; +} + +/* Some architectures don't latch interrupts when they are disabled, so using + * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to + * avoid losing them. It is not recommended to use it, unless this is the last + * resort. + */ +u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq) +{ + u32 count_eqe; + + disable_irq(eq->core.irqn); + count_eqe = eq->core.cons_index; + mlx5_eq_comp_int(&eq->irq_nb, 0, NULL); + count_eqe = eq->core.cons_index - count_eqe; + enable_irq(eq->core.irqn); + + return count_eqe; +} + +static void mlx5_eq_async_int_lock(struct mlx5_eq_async *eq, bool recovery, + unsigned long *flags) + __acquires(&eq->lock) +{ + if (!recovery) + spin_lock(&eq->lock); + else + spin_lock_irqsave(&eq->lock, *flags); +} + +static void mlx5_eq_async_int_unlock(struct mlx5_eq_async *eq, bool recovery, + unsigned long *flags) + __releases(&eq->lock) +{ + if (!recovery) + spin_unlock(&eq->lock); + else + spin_unlock_irqrestore(&eq->lock, *flags); +} + +enum async_eq_nb_action { + ASYNC_EQ_IRQ_HANDLER = 0, + ASYNC_EQ_RECOVER = 1, +}; + +static int mlx5_eq_async_int(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct mlx5_eq_async *eq_async = + container_of(nb, struct mlx5_eq_async, irq_nb); + struct mlx5_eq *eq = &eq_async->core; + struct mlx5_eq_table *eqt; + struct mlx5_core_dev *dev; + struct mlx5_eqe *eqe; + unsigned long flags; + int num_eqes = 0; + bool recovery; + + dev = eq->dev; + eqt = dev->priv.eq_table; + + recovery = action == ASYNC_EQ_RECOVER; + mlx5_eq_async_int_lock(eq_async, recovery, &flags); + + eqe = next_eqe_sw(eq); + if (!eqe) + goto out; + + do { + /* + * Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ + dma_rmb(); + + atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe); + atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe); + + ++eq->cons_index; + + } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq))); + +out: + eq_update_ci(eq, 1); + mlx5_eq_async_int_unlock(eq_async, recovery, &flags); + + return unlikely(recovery) ? num_eqes : 0; +} + +void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_async *eq = &dev->priv.eq_table->cmd_eq; + int eqes; + + eqes = mlx5_eq_async_int(&eq->irq_nb, ASYNC_EQ_RECOVER, NULL); + if (eqes) + mlx5_core_warn(dev, "Recovered %d EQEs on cmd_eq\n", eqes); +} + +static void init_eq_buf(struct mlx5_eq *eq) +{ + struct mlx5_eqe *eqe; + int i; + + for (i = 0; i < eq_get_size(eq); i++) { + eqe = get_eqe(eq, i); + eqe->owner = MLX5_EQE_OWNER_INIT_VAL; + } +} + +static int +create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct mlx5_eq_param *param) +{ + u8 log_eq_size = order_base_2(param->nent + MLX5_NUM_SPARE_EQE); + struct mlx5_cq_table *cq_table = &eq->cq_table; + u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; + u8 log_eq_stride = ilog2(MLX5_EQE_SIZE); + struct mlx5_priv *priv = &dev->priv; + __be64 *pas; + u16 vecidx; + void *eqc; + int inlen; + u32 *in; + int err; + int i; + + /* Init CQ table */ + memset(cq_table, 0, sizeof(*cq_table)); + spin_lock_init(&cq_table->lock); + INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); + + eq->cons_index = 0; + + err = mlx5_frag_buf_alloc_node(dev, wq_get_byte_sz(log_eq_size, log_eq_stride), + &eq->frag_buf, dev->priv.numa_node); + if (err) + return err; + + mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc); + init_eq_buf(eq); + + eq->irq = param->irq; + vecidx = mlx5_irq_get_index(eq->irq); + + inlen = MLX5_ST_SZ_BYTES(create_eq_in) + + MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_buf; + } + + pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas); + mlx5_fill_page_frag_array(&eq->frag_buf, pas); + + MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ); + if (!param->mask[0] && MLX5_CAP_GEN(dev, log_max_uctx)) + MLX5_SET(create_eq_in, in, uid, MLX5_SHARED_RESOURCE_UID); + + for (i = 0; i < 4; i++) + MLX5_ARRAY_SET64(create_eq_in, in, event_bitmask, i, + param->mask[i]); + + eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry); + MLX5_SET(eqc, eqc, log_eq_size, eq->fbc.log_sz); + MLX5_SET(eqc, eqc, uar_page, priv->uar->index); + MLX5_SET(eqc, eqc, intr, vecidx); + MLX5_SET(eqc, eqc, log_page_size, + eq->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + if (err) + goto err_in; + + eq->vecidx = vecidx; + eq->eqn = MLX5_GET(create_eq_out, out, eq_number); + eq->irqn = pci_irq_vector(dev->pdev, vecidx); + eq->dev = dev; + eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; + + err = mlx5_debug_eq_add(dev, eq); + if (err) + goto err_eq; + + kvfree(in); + return 0; + +err_eq: + mlx5_cmd_destroy_eq(dev, eq->eqn); + +err_in: + kvfree(in); + +err_buf: + mlx5_frag_buf_free(dev, &eq->frag_buf); + return err; +} + +/** + * mlx5_eq_enable - Enable EQ for receiving EQEs + * @dev : Device which owns the eq + * @eq : EQ to enable + * @nb : Notifier call block + * + * Must be called after EQ is created in device. + * + * @return: 0 if no error + */ +int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct notifier_block *nb) +{ + int err; + + err = mlx5_irq_attach_nb(eq->irq, nb); + if (!err) + eq_update_ci(eq, 1); + + return err; +} +EXPORT_SYMBOL(mlx5_eq_enable); + +/** + * mlx5_eq_disable - Disable EQ for receiving EQEs + * @dev : Device which owns the eq + * @eq : EQ to disable + * @nb : Notifier call block + * + * Must be called before EQ is destroyed. + */ +void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct notifier_block *nb) +{ + mlx5_irq_detach_nb(eq->irq, nb); +} +EXPORT_SYMBOL(mlx5_eq_disable); + +static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + int err; + + mlx5_debug_eq_remove(dev, eq); + + err = mlx5_cmd_destroy_eq(dev, eq->eqn); + if (err) + mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", + eq->eqn); + + mlx5_frag_buf_free(dev, &eq->frag_buf); + return err; +} + +int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) +{ + struct mlx5_cq_table *table = &eq->cq_table; + int err; + + spin_lock(&table->lock); + err = radix_tree_insert(&table->tree, cq->cqn, cq); + spin_unlock(&table->lock); + + return err; +} + +void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) +{ + struct mlx5_cq_table *table = &eq->cq_table; + struct mlx5_core_cq *tmp; + + spin_lock(&table->lock); + tmp = radix_tree_delete(&table->tree, cq->cqn); + spin_unlock(&table->lock); + + if (!tmp) { + mlx5_core_dbg(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", + eq->eqn, cq->cqn); + return; + } + + if (tmp != cq) + mlx5_core_dbg(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", + eq->eqn, cq->cqn); +} + +int mlx5_eq_table_init(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *eq_table; + int i; + + eq_table = kvzalloc_node(sizeof(*eq_table), GFP_KERNEL, + dev->priv.numa_node); + if (!eq_table) + return -ENOMEM; + + dev->priv.eq_table = eq_table; + + mlx5_eq_debugfs_init(dev); + + mutex_init(&eq_table->lock); + for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++) + ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]); + + eq_table->irq_table = mlx5_irq_table_get(dev); + cpumask_clear(&eq_table->used_cpus); + xa_init(&eq_table->comp_eqs); + xa_init(&eq_table->comp_irqs); + mutex_init(&eq_table->comp_lock); + eq_table->curr_comp_eqs = 0; + return 0; +} + +void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + + mlx5_eq_debugfs_cleanup(dev); + xa_destroy(&table->comp_irqs); + xa_destroy(&table->comp_eqs); + kvfree(table); +} + +/* Async EQs */ + +static int create_async_eq(struct mlx5_core_dev *dev, + struct mlx5_eq *eq, struct mlx5_eq_param *param) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + int err; + + mutex_lock(&eq_table->lock); + err = create_map_eq(dev, eq, param); + mutex_unlock(&eq_table->lock); + return err; +} + +static int destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + int err; + + mutex_lock(&eq_table->lock); + err = destroy_unmap_eq(dev, eq); + mutex_unlock(&eq_table->lock); + return err; +} + +static int cq_err_event_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_eq_table *eqt; + struct mlx5_core_cq *cq; + struct mlx5_eqe *eqe; + struct mlx5_eq *eq; + u32 cqn; + + /* type == MLX5_EVENT_TYPE_CQ_ERROR */ + + eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb); + eq = &eqt->async_eq.core; + eqe = data; + + cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; + mlx5_core_warn(eq->dev, "CQ error on CQN 0x%x, syndrome 0x%x\n", + cqn, eqe->data.cq_err.syndrome); + + cq = mlx5_eq_cq_get(eq, cqn); + if (unlikely(!cq)) { + mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn); + return NOTIFY_OK; + } + + if (cq->event) + cq->event(cq, type); + + mlx5_cq_put(cq); + + return NOTIFY_OK; +} + +static void gather_user_async_events(struct mlx5_core_dev *dev, u64 mask[4]) +{ + __be64 *user_unaffiliated_events; + __be64 *user_affiliated_events; + int i; + + user_affiliated_events = + MLX5_CAP_DEV_EVENT(dev, user_affiliated_events); + user_unaffiliated_events = + MLX5_CAP_DEV_EVENT(dev, user_unaffiliated_events); + + for (i = 0; i < 4; i++) + mask[i] |= be64_to_cpu(user_affiliated_events[i] | + user_unaffiliated_events[i]); +} + +static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4]) +{ + u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; + + if (MLX5_VPORT_MANAGER(dev)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE); + + if (MLX5_CAP_GEN(dev, general_notification_event)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_GENERAL_EVENT); + + if (MLX5_CAP_GEN(dev, port_module_event)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_PORT_MODULE_EVENT); + else + mlx5_core_dbg(dev, "port_module_event is not set\n"); + + if (MLX5_PPS_CAP(dev)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT); + + if (MLX5_CAP_GEN(dev, fpga)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR) | + (1ull << MLX5_EVENT_TYPE_FPGA_QP_ERROR); + if (MLX5_CAP_GEN_MAX(dev, dct)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_DCT_DRAINED); + + if (MLX5_CAP_GEN(dev, temp_warn_event)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_TEMP_WARN_EVENT); + + if (MLX5_CAP_MCAM_REG(dev, tracer_registers)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER); + + if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER); + + if (mlx5_eswitch_is_funcs_handler(dev)) + async_event_mask |= + (1ull << MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED); + + if (MLX5_CAP_GEN_MAX(dev, vhca_state)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_VHCA_STATE_CHANGE); + + if (MLX5_CAP_MACSEC(dev, log_max_macsec_offload)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_OBJECT_CHANGE); + + if (mlx5_ipsec_device_caps(dev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD) + async_event_mask |= + (1ull << MLX5_EVENT_TYPE_OBJECT_CHANGE); + + mask[0] = async_event_mask; + + if (MLX5_CAP_GEN(dev, event_cap)) + gather_user_async_events(dev, mask); +} + +static int +setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq, + struct mlx5_eq_param *param, const char *name) +{ + int err; + + eq->irq_nb.notifier_call = mlx5_eq_async_int; + spin_lock_init(&eq->lock); + + err = create_async_eq(dev, &eq->core, param); + if (err) { + mlx5_core_warn(dev, "failed to create %s EQ %d\n", name, err); + return err; + } + err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable %s EQ %d\n", name, err); + destroy_async_eq(dev, &eq->core); + } + return err; +} + +static void cleanup_async_eq(struct mlx5_core_dev *dev, + struct mlx5_eq_async *eq, const char *name) +{ + int err; + + mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); + err = destroy_async_eq(dev, &eq->core); + if (err) + mlx5_core_err(dev, "failed to destroy %s eq, err(%d)\n", + name, err); +} + +static u16 async_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + union devlink_param_value val; + int err; + + err = devl_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, + &val); + if (!err) + return val.vu32; + mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err); + return MLX5_NUM_ASYNC_EQE; +} + +static int create_async_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_param param = {}; + int err; + + /* All the async_eqs are using single IRQ, request one IRQ and share its + * index among all the async_eqs of this device. + */ + table->ctrl_irq = mlx5_ctrl_irq_request(dev); + if (IS_ERR(table->ctrl_irq)) + return PTR_ERR(table->ctrl_irq); + + MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR); + mlx5_eq_notifier_register(dev, &table->cq_err_nb); + + param = (struct mlx5_eq_param) { + .irq = table->ctrl_irq, + .nent = MLX5_NUM_CMD_EQE, + .mask[0] = 1ull << MLX5_EVENT_TYPE_CMD, + }; + mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_CREATE_EQ); + err = setup_async_eq(dev, &table->cmd_eq, ¶m, "cmd"); + if (err) + goto err1; + + mlx5_cmd_use_events(dev); + mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); + + param = (struct mlx5_eq_param) { + .irq = table->ctrl_irq, + .nent = async_eq_depth_devlink_param_get(dev), + }; + + gather_async_events_mask(dev, param.mask); + err = setup_async_eq(dev, &table->async_eq, ¶m, "async"); + if (err) + goto err2; + + param = (struct mlx5_eq_param) { + .irq = table->ctrl_irq, + .nent = /* TODO: sriov max_vf + */ 1, + .mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST, + }; + + err = setup_async_eq(dev, &table->pages_eq, ¶m, "pages"); + if (err) + goto err3; + + return 0; + +err3: + cleanup_async_eq(dev, &table->async_eq, "async"); +err2: + mlx5_cmd_use_polling(dev); + cleanup_async_eq(dev, &table->cmd_eq, "cmd"); +err1: + mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); + mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); + mlx5_ctrl_irq_release(table->ctrl_irq); + return err; +} + +static void destroy_async_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + + cleanup_async_eq(dev, &table->pages_eq, "pages"); + cleanup_async_eq(dev, &table->async_eq, "async"); + mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_DESTROY_EQ); + mlx5_cmd_use_polling(dev); + cleanup_async_eq(dev, &table->cmd_eq, "cmd"); + mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); + mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); + mlx5_ctrl_irq_release(table->ctrl_irq); +} + +struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev) +{ + return &dev->priv.eq_table->async_eq.core; +} + +void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev) +{ + synchronize_irq(dev->priv.eq_table->async_eq.core.irqn); +} + +void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev) +{ + synchronize_irq(dev->priv.eq_table->cmd_eq.core.irqn); +} + +/* Generic EQ API for mlx5_core consumers + * Needed For RDMA ODP EQ for now + */ +struct mlx5_eq * +mlx5_eq_create_generic(struct mlx5_core_dev *dev, + struct mlx5_eq_param *param) +{ + struct mlx5_eq *eq = kvzalloc_node(sizeof(*eq), GFP_KERNEL, + dev->priv.numa_node); + int err; + + if (!eq) + return ERR_PTR(-ENOMEM); + + param->irq = dev->priv.eq_table->ctrl_irq; + err = create_async_eq(dev, eq, param); + if (err) { + kvfree(eq); + eq = ERR_PTR(err); + } + + return eq; +} +EXPORT_SYMBOL(mlx5_eq_create_generic); + +int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + int err; + + if (IS_ERR(eq)) + return -EINVAL; + + err = destroy_async_eq(dev, eq); + if (err) + goto out; + + kvfree(eq); +out: + return err; +} +EXPORT_SYMBOL(mlx5_eq_destroy_generic); + +struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc) +{ + u32 ci = eq->cons_index + cc; + u32 nent = eq_get_size(eq); + struct mlx5_eqe *eqe; + + eqe = get_eqe(eq, ci & (nent - 1)); + eqe = ((eqe->owner & 1) ^ !!(ci & nent)) ? NULL : eqe; + /* Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ + if (eqe) + dma_rmb(); + + return eqe; +} +EXPORT_SYMBOL(mlx5_eq_get_eqe); + +void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm) +{ + __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); + u32 val; + + eq->cons_index += cc; + val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); + + __raw_writel((__force u32)cpu_to_be32(val), addr); + /* We still want ordering, just not swabbing, so add a barrier */ + wmb(); +} +EXPORT_SYMBOL(mlx5_eq_update_ci); + +static void comp_irq_release_pci(struct mlx5_core_dev *dev, u16 vecidx) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_irq *irq; + + irq = xa_load(&table->comp_irqs, vecidx); + if (!irq) + return; + + xa_erase(&table->comp_irqs, vecidx); + mlx5_irq_release_vector(irq); +} + +static int mlx5_cpumask_default_spread(int numa_node, int index) +{ + const struct cpumask *prev = cpu_none_mask; + const struct cpumask *mask; + int found_cpu = 0; + int i = 0; + int cpu; + + rcu_read_lock(); + for_each_numa_hop_mask(mask, numa_node) { + for_each_cpu_andnot(cpu, mask, prev) { + if (i++ == index) { + found_cpu = cpu; + goto spread_done; + } + } + prev = mask; + } + +spread_done: + rcu_read_unlock(); + return found_cpu; +} + +static struct cpu_rmap *mlx5_eq_table_get_pci_rmap(struct mlx5_core_dev *dev) +{ +#ifdef CONFIG_RFS_ACCEL +#ifdef CONFIG_MLX5_SF + if (mlx5_core_is_sf(dev)) + return dev->priv.parent_mdev->priv.eq_table->rmap; +#endif + return dev->priv.eq_table->rmap; +#else + return NULL; +#endif +} + +static int comp_irq_request_pci(struct mlx5_core_dev *dev, u16 vecidx) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct cpu_rmap *rmap; + struct mlx5_irq *irq; + int cpu; + + rmap = mlx5_eq_table_get_pci_rmap(dev); + cpu = mlx5_cpumask_default_spread(dev->priv.numa_node, vecidx); + irq = mlx5_irq_request_vector(dev, cpu, vecidx, &rmap); + if (IS_ERR(irq)) + return PTR_ERR(irq); + + return xa_err(xa_store(&table->comp_irqs, vecidx, irq, GFP_KERNEL)); +} + +static void comp_irq_release_sf(struct mlx5_core_dev *dev, u16 vecidx) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_irq *irq; + int cpu; + + irq = xa_load(&table->comp_irqs, vecidx); + if (!irq) + return; + + cpu = cpumask_first(mlx5_irq_get_affinity_mask(irq)); + cpumask_clear_cpu(cpu, &table->used_cpus); + xa_erase(&table->comp_irqs, vecidx); + mlx5_irq_affinity_irq_release(dev, irq); +} + +static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); + struct irq_affinity_desc af_desc = {}; + struct mlx5_irq *irq; + + /* In case SF irq pool does not exist, fallback to the PF irqs*/ + if (!mlx5_irq_pool_is_sf_pool(pool)) + return comp_irq_request_pci(dev, vecidx); + + af_desc.is_managed = 1; + cpumask_copy(&af_desc.mask, cpu_online_mask); + cpumask_andnot(&af_desc.mask, &af_desc.mask, &table->used_cpus); + irq = mlx5_irq_affinity_request(pool, &af_desc); + if (IS_ERR(irq)) + return PTR_ERR(irq); + + cpumask_or(&table->used_cpus, &table->used_cpus, mlx5_irq_get_affinity_mask(irq)); + mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n", + pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)), + cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)), + mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ); + + return xa_err(xa_store(&table->comp_irqs, vecidx, irq, GFP_KERNEL)); +} + +static void comp_irq_release(struct mlx5_core_dev *dev, u16 vecidx) +{ + mlx5_core_is_sf(dev) ? comp_irq_release_sf(dev, vecidx) : + comp_irq_release_pci(dev, vecidx); +} + +static int comp_irq_request(struct mlx5_core_dev *dev, u16 vecidx) +{ + return mlx5_core_is_sf(dev) ? comp_irq_request_sf(dev, vecidx) : + comp_irq_request_pci(dev, vecidx); +} + +#ifdef CONFIG_RFS_ACCEL +static int alloc_rmap(struct mlx5_core_dev *mdev) +{ + struct mlx5_eq_table *eq_table = mdev->priv.eq_table; + + /* rmap is a mapping between irq number and queue number. + * Each irq can be assigned only to a single rmap. + * Since SFs share IRQs, rmap mapping cannot function correctly + * for irqs that are shared between different core/netdev RX rings. + * Hence we don't allow netdev rmap for SFs. + */ + if (mlx5_core_is_sf(mdev)) + return 0; + + eq_table->rmap = alloc_irq_cpu_rmap(eq_table->max_comp_eqs); + if (!eq_table->rmap) + return -ENOMEM; + return 0; +} + +static void free_rmap(struct mlx5_core_dev *mdev) +{ + struct mlx5_eq_table *eq_table = mdev->priv.eq_table; + + if (eq_table->rmap) { + free_irq_cpu_rmap(eq_table->rmap); + eq_table->rmap = NULL; + } +} +#else +static int alloc_rmap(struct mlx5_core_dev *mdev) { return 0; } +static void free_rmap(struct mlx5_core_dev *mdev) {} +#endif + +static void destroy_comp_eq(struct mlx5_core_dev *dev, struct mlx5_eq_comp *eq, u16 vecidx) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + + xa_erase(&table->comp_eqs, vecidx); + mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); + if (destroy_unmap_eq(dev, &eq->core)) + mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", + eq->core.eqn); + tasklet_disable(&eq->tasklet_ctx.task); + kfree(eq); + comp_irq_release(dev, vecidx); + table->curr_comp_eqs--; +} + +static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + union devlink_param_value val; + int err; + + err = devl_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, + &val); + if (!err) + return val.vu32; + mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err); + return MLX5_COMP_EQ_SIZE; +} + +/* Must be called with EQ table comp_lock held */ +static int create_comp_eq(struct mlx5_core_dev *dev, u16 vecidx) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_param param = {}; + struct mlx5_eq_comp *eq; + struct mlx5_irq *irq; + int nent; + int err; + + lockdep_assert_held(&table->comp_lock); + if (table->curr_comp_eqs == table->max_comp_eqs) { + mlx5_core_err(dev, "maximum number of vectors is allocated, %d\n", + table->max_comp_eqs); + return -ENOMEM; + } + + err = comp_irq_request(dev, vecidx); + if (err) + return err; + + nent = comp_eq_depth_devlink_param_get(dev); + + eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, dev->priv.numa_node); + if (!eq) { + err = -ENOMEM; + goto clean_irq; + } + + INIT_LIST_HEAD(&eq->tasklet_ctx.list); + INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); + spin_lock_init(&eq->tasklet_ctx.lock); + tasklet_setup(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb); + + irq = xa_load(&table->comp_irqs, vecidx); + eq->irq_nb.notifier_call = mlx5_eq_comp_int; + param = (struct mlx5_eq_param) { + .irq = irq, + .nent = nent, + }; + + err = create_map_eq(dev, &eq->core, ¶m); + if (err) + goto clean_eq; + err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); + if (err) { + destroy_unmap_eq(dev, &eq->core); + goto clean_eq; + } + + mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn); + err = xa_err(xa_store(&table->comp_eqs, vecidx, eq, GFP_KERNEL)); + if (err) + goto disable_eq; + + table->curr_comp_eqs++; + return eq->core.eqn; + +disable_eq: + mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); +clean_eq: + kfree(eq); +clean_irq: + comp_irq_release(dev, vecidx); + return err; +} + +int mlx5_comp_eqn_get(struct mlx5_core_dev *dev, u16 vecidx, int *eqn) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_comp *eq; + int ret = 0; + + mutex_lock(&table->comp_lock); + eq = xa_load(&table->comp_eqs, vecidx); + if (eq) { + *eqn = eq->core.eqn; + goto out; + } + + ret = create_comp_eq(dev, vecidx); + if (ret < 0) { + mutex_unlock(&table->comp_lock); + return ret; + } + + *eqn = ret; +out: + mutex_unlock(&table->comp_lock); + return 0; +} +EXPORT_SYMBOL(mlx5_comp_eqn_get); + +int mlx5_comp_irqn_get(struct mlx5_core_dev *dev, int vector, unsigned int *irqn) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_comp *eq; + int eqn; + int err; + + /* Allocate the EQ if not allocated yet */ + err = mlx5_comp_eqn_get(dev, vector, &eqn); + if (err) + return err; + + eq = xa_load(&table->comp_eqs, vector); + *irqn = eq->core.irqn; + return 0; +} + +unsigned int mlx5_comp_vectors_max(struct mlx5_core_dev *dev) +{ + return dev->priv.eq_table->max_comp_eqs; +} +EXPORT_SYMBOL(mlx5_comp_vectors_max); + +static struct cpumask * +mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_comp *eq; + + eq = xa_load(&table->comp_eqs, vector); + if (eq) + return mlx5_irq_get_affinity_mask(eq->core.irq); + + return NULL; +} + +int mlx5_comp_vector_get_cpu(struct mlx5_core_dev *dev, int vector) +{ + struct cpumask *mask; + int cpu; + + mask = mlx5_comp_irq_get_affinity_mask(dev, vector); + if (mask) + cpu = cpumask_first(mask); + else + cpu = mlx5_cpumask_default_spread(dev->priv.numa_node, vector); + + return cpu; +} +EXPORT_SYMBOL(mlx5_comp_vector_get_cpu); + +#ifdef CONFIG_RFS_ACCEL +struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) +{ + return dev->priv.eq_table->rmap; +} +#endif + +struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_comp *eq; + unsigned long index; + + xa_for_each(&table->comp_eqs, index, eq) + if (eq->core.eqn == eqn) + return eq; + + return ERR_PTR(-ENOENT); +} + +/* This function should only be called after mlx5_cmd_force_teardown_hca */ +void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) +{ + mlx5_irq_table_free_irqs(dev); +} + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING +#define MLX5_MAX_ASYNC_EQS 4 +#else +#define MLX5_MAX_ASYNC_EQS 3 +#endif + +static int get_num_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + int max_dev_eqs; + int max_eqs_sf; + int num_eqs; + + /* If ethernet is disabled we use just a single completion vector to + * have the other vectors available for other drivers using mlx5_core. For + * example, mlx5_vdpa + */ + if (!mlx5_core_is_eth_enabled(dev) && mlx5_eth_supported(dev)) + return 1; + + max_dev_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? + MLX5_CAP_GEN(dev, max_num_eqs) : + 1 << MLX5_CAP_GEN(dev, log_max_eq); + + num_eqs = min_t(int, mlx5_irq_table_get_num_comp(eq_table->irq_table), + max_dev_eqs - MLX5_MAX_ASYNC_EQS); + if (mlx5_core_is_sf(dev)) { + max_eqs_sf = min_t(int, MLX5_COMP_EQS_PER_SF, + mlx5_irq_table_get_sfs_vec(eq_table->irq_table)); + num_eqs = min_t(int, num_eqs, max_eqs_sf); + } + + return num_eqs; +} + +int mlx5_eq_table_create(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + int err; + + eq_table->max_comp_eqs = get_num_eqs(dev); + err = create_async_eqs(dev); + if (err) { + mlx5_core_err(dev, "Failed to create async EQs\n"); + goto err_async_eqs; + } + + err = alloc_rmap(dev); + if (err) { + mlx5_core_err(dev, "Failed to allocate rmap\n"); + goto err_rmap; + } + + return 0; + +err_rmap: + destroy_async_eqs(dev); +err_async_eqs: + return err; +} + +void mlx5_eq_table_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_comp *eq; + unsigned long index; + + xa_for_each(&table->comp_eqs, index, eq) + destroy_comp_eq(dev, eq, index); + + free_rmap(dev); + destroy_async_eqs(dev); +} + +int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb) +{ + struct mlx5_eq_table *eqt = dev->priv.eq_table; + + return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb); +} +EXPORT_SYMBOL(mlx5_eq_notifier_register); + +int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb) +{ + struct mlx5_eq_table *eqt = dev->priv.eq_table; + + return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb); +} +EXPORT_SYMBOL(mlx5_eq_notifier_unregister); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/esw/Makefile new file mode 100644 index 0000000000..c78512eed8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c new file mode 100644 index 0000000000..6b4c9ffad9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#include "mlx5_core.h" +#include "eswitch.h" +#include "helper.h" +#include "lgcy.h" + +static void esw_acl_egress_lgcy_rules_destroy(struct mlx5_vport *vport) +{ + esw_acl_egress_vlan_destroy(vport); + if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_rule)) { + mlx5_del_flow_rules(vport->egress.legacy.drop_rule); + vport->egress.legacy.drop_rule = NULL; + } +} + +static int esw_acl_egress_lgcy_groups_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_group *drop_grp; + u32 *flow_group_in; + int err = 0; + + err = esw_acl_egress_vlan_grp_create(esw, vport); + if (err) + return err; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) { + err = -ENOMEM; + goto alloc_err; + } + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); + drop_grp = mlx5_create_flow_group(vport->egress.acl, flow_group_in); + if (IS_ERR(drop_grp)) { + err = PTR_ERR(drop_grp); + esw_warn(dev, "Failed to create E-Switch vport[%d] egress drop flow group, err(%d)\n", + vport->vport, err); + goto drop_grp_err; + } + + vport->egress.legacy.drop_grp = drop_grp; + kvfree(flow_group_in); + return 0; + +drop_grp_err: + kvfree(flow_group_in); +alloc_err: + esw_acl_egress_vlan_grp_destroy(vport); + return err; +} + +static void esw_acl_egress_lgcy_groups_destroy(struct mlx5_vport *vport) +{ + if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_grp)) { + mlx5_destroy_flow_group(vport->egress.legacy.drop_grp); + vport->egress.legacy.drop_grp = NULL; + } + esw_acl_egress_vlan_grp_destroy(vport); +} + +int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + bool vst_mode_steering = esw_vst_mode_is_steering(esw); + struct mlx5_flow_destination drop_ctr_dst = {}; + struct mlx5_flow_destination *dst = NULL; + struct mlx5_fc *drop_counter = NULL; + struct mlx5_flow_act flow_act = {}; + /* The egress acl table contains 2 rules: + * 1)Allow traffic with vlan_tag=vst_vlan_id + * 2)Drop all other traffic. + */ + int table_size = 2; + int dest_num = 0; + int actions_flag; + int err = 0; + + if (vport->egress.legacy.drop_counter) { + drop_counter = vport->egress.legacy.drop_counter; + } else if (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, flow_counter)) { + drop_counter = mlx5_fc_create(esw->dev, false); + if (IS_ERR(drop_counter)) { + esw_warn(esw->dev, + "vport[%d] configure egress drop rule counter err(%ld)\n", + vport->vport, PTR_ERR(drop_counter)); + drop_counter = NULL; + } + vport->egress.legacy.drop_counter = drop_counter; + } + + esw_acl_egress_lgcy_rules_destroy(vport); + + if (!vport->info.vlan && !vport->info.qos) { + esw_acl_egress_lgcy_cleanup(esw, vport); + return 0; + } + + if (!vport->egress.acl) { + vport->egress.acl = esw_acl_table_create(esw, vport, + MLX5_FLOW_NAMESPACE_ESW_EGRESS, + table_size); + if (IS_ERR(vport->egress.acl)) { + err = PTR_ERR(vport->egress.acl); + vport->egress.acl = NULL; + goto out; + } + + err = esw_acl_egress_lgcy_groups_create(esw, vport); + if (err) + goto out; + } + + esw_debug(esw->dev, + "vport[%d] configure egress rules, vlan(%d) qos(%d)\n", + vport->vport, vport->info.vlan, vport->info.qos); + + /* Allowed vlan rule */ + actions_flag = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + if (vst_mode_steering) + actions_flag |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + err = esw_egress_acl_vlan_create(esw, vport, NULL, vport->info.vlan, + actions_flag); + if (err) + goto out; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + + /* Attach egress drop flow counter */ + if (drop_counter) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + drop_ctr_dst.counter_id = mlx5_fc_id(drop_counter); + dst = &drop_ctr_dst; + dest_num++; + } + vport->egress.legacy.drop_rule = + mlx5_add_flow_rules(vport->egress.acl, NULL, + &flow_act, dst, dest_num); + if (IS_ERR(vport->egress.legacy.drop_rule)) { + err = PTR_ERR(vport->egress.legacy.drop_rule); + esw_warn(esw->dev, + "vport[%d] configure egress drop rule failed, err(%d)\n", + vport->vport, err); + vport->egress.legacy.drop_rule = NULL; + goto out; + } + + return err; + +out: + esw_acl_egress_lgcy_cleanup(esw, vport); + return err; +} + +void esw_acl_egress_lgcy_cleanup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (IS_ERR_OR_NULL(vport->egress.acl)) + goto clean_drop_counter; + + esw_debug(esw->dev, "Destroy vport[%d] E-Switch egress ACL\n", vport->vport); + + esw_acl_egress_lgcy_rules_destroy(vport); + esw_acl_egress_lgcy_groups_destroy(vport); + esw_acl_egress_table_destroy(vport); + +clean_drop_counter: + if (vport->egress.legacy.drop_counter) { + mlx5_fc_destroy(esw->dev, vport->egress.legacy.drop_counter); + vport->egress.legacy.drop_counter = NULL; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c new file mode 100644 index 0000000000..24b1ca4e4f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c @@ -0,0 +1,274 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#include "mlx5_core.h" +#include "eswitch.h" +#include "helper.h" +#include "ofld.h" + +static void esw_acl_egress_ofld_fwd2vport_destroy(struct mlx5_vport *vport) +{ + if (!vport->egress.offloads.fwd_rule) + return; + + mlx5_del_flow_rules(vport->egress.offloads.fwd_rule); + vport->egress.offloads.fwd_rule = NULL; +} + +void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport, int rule_index) +{ + struct mlx5_flow_handle *bounce_rule = + xa_load(&vport->egress.offloads.bounce_rules, rule_index); + + if (!bounce_rule) + return; + + mlx5_del_flow_rules(bounce_rule); + xa_erase(&vport->egress.offloads.bounce_rules, rule_index); +} + +static void esw_acl_egress_ofld_bounce_rules_destroy(struct mlx5_vport *vport) +{ + struct mlx5_flow_handle *bounce_rule; + unsigned long i; + + xa_for_each(&vport->egress.offloads.bounce_rules, i, bounce_rule) { + mlx5_del_flow_rules(bounce_rule); + xa_erase(&vport->egress.offloads.bounce_rules, i); + } +} + +static int esw_acl_egress_ofld_fwd2vport_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_flow_destination *fwd_dest) +{ + struct mlx5_flow_act flow_act = {}; + int err = 0; + + esw_debug(esw->dev, "vport(%d) configure egress acl rule fwd2vport(%d)\n", + vport->vport, fwd_dest->vport.num); + + /* Delete the old egress forward-to-vport rule if any */ + esw_acl_egress_ofld_fwd2vport_destroy(vport); + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + vport->egress.offloads.fwd_rule = + mlx5_add_flow_rules(vport->egress.acl, NULL, + &flow_act, fwd_dest, 1); + if (IS_ERR(vport->egress.offloads.fwd_rule)) { + err = PTR_ERR(vport->egress.offloads.fwd_rule); + esw_warn(esw->dev, + "vport(%d) failed to add fwd2vport acl rule err(%d)\n", + vport->vport, err); + vport->egress.offloads.fwd_rule = NULL; + } + + return err; +} + +static int esw_acl_egress_ofld_rules_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_flow_destination *fwd_dest) +{ + int err = 0; + int action; + + if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) { + /* For prio tag mode, there is only 1 FTEs: + * 1) prio tag packets - pop the prio tag VLAN, allow + * Unmatched traffic is allowed by default + */ + esw_debug(esw->dev, + "vport[%d] configure prio tag egress rules\n", vport->vport); + + action = MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + action |= fwd_dest ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : + MLX5_FLOW_CONTEXT_ACTION_ALLOW; + + /* prio tag vlan rule - pop it so vport receives untagged packets */ + err = esw_egress_acl_vlan_create(esw, vport, fwd_dest, 0, action); + if (err) + goto prio_err; + } + + if (fwd_dest) { + err = esw_acl_egress_ofld_fwd2vport_create(esw, vport, fwd_dest); + if (err) + goto fwd_err; + } + + return 0; + +fwd_err: + esw_acl_egress_vlan_destroy(vport); +prio_err: + return err; +} + +static void esw_acl_egress_ofld_rules_destroy(struct mlx5_vport *vport) +{ + esw_acl_egress_vlan_destroy(vport); + esw_acl_egress_ofld_fwd2vport_destroy(vport); + esw_acl_egress_ofld_bounce_rules_destroy(vport); +} + +static int esw_acl_egress_ofld_groups_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fwd_grp; + u32 *flow_group_in; + u32 flow_index = 0; + int ret = 0; + + if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) { + ret = esw_acl_egress_vlan_grp_create(esw, vport); + if (ret) + return ret; + + flow_index++; + } + + if (!mlx5_esw_acl_egress_fwd2vport_supported(esw)) + goto out; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) { + ret = -ENOMEM; + goto fwd_grp_err; + } + + /* This group holds 1 FTE to forward all packets to other vport + * when bond vports is supported. + */ + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index); + fwd_grp = mlx5_create_flow_group(vport->egress.acl, flow_group_in); + if (IS_ERR(fwd_grp)) { + ret = PTR_ERR(fwd_grp); + esw_warn(esw->dev, + "Failed to create vport[%d] egress fwd2vport flow group, err(%d)\n", + vport->vport, ret); + kvfree(flow_group_in); + goto fwd_grp_err; + } + vport->egress.offloads.fwd_grp = fwd_grp; + kvfree(flow_group_in); + return 0; + +fwd_grp_err: + esw_acl_egress_vlan_grp_destroy(vport); +out: + return ret; +} + +static void esw_acl_egress_ofld_groups_destroy(struct mlx5_vport *vport) +{ + if (!IS_ERR_OR_NULL(vport->egress.offloads.fwd_grp)) { + mlx5_destroy_flow_group(vport->egress.offloads.fwd_grp); + vport->egress.offloads.fwd_grp = NULL; + } + + if (!IS_ERR_OR_NULL(vport->egress.offloads.bounce_grp)) { + mlx5_destroy_flow_group(vport->egress.offloads.bounce_grp); + vport->egress.offloads.bounce_grp = NULL; + } + + esw_acl_egress_vlan_grp_destroy(vport); +} + +static bool esw_acl_egress_needed(struct mlx5_eswitch *esw, u16 vport_num) +{ + return mlx5_eswitch_is_vf_vport(esw, vport_num) || mlx5_esw_is_sf_vport(esw, vport_num); +} + +int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + int table_size = 0; + int err; + + if (!mlx5_esw_acl_egress_fwd2vport_supported(esw) && + !MLX5_CAP_GEN(esw->dev, prio_tag_required)) + return 0; + + if (!esw_acl_egress_needed(esw, vport->vport)) + return 0; + + esw_acl_egress_ofld_rules_destroy(vport); + + if (mlx5_esw_acl_egress_fwd2vport_supported(esw)) + table_size++; + if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) + table_size++; + vport->egress.acl = esw_acl_table_create(esw, vport, + MLX5_FLOW_NAMESPACE_ESW_EGRESS, table_size); + if (IS_ERR(vport->egress.acl)) { + err = PTR_ERR(vport->egress.acl); + vport->egress.acl = NULL; + return err; + } + vport->egress.type = VPORT_EGRESS_ACL_TYPE_DEFAULT; + + err = esw_acl_egress_ofld_groups_create(esw, vport); + if (err) + goto group_err; + + esw_debug(esw->dev, "vport[%d] configure egress rules\n", vport->vport); + + err = esw_acl_egress_ofld_rules_create(esw, vport, NULL); + if (err) + goto rules_err; + + return 0; + +rules_err: + esw_acl_egress_ofld_groups_destroy(vport); +group_err: + esw_acl_egress_table_destroy(vport); + return err; +} + +void esw_acl_egress_ofld_cleanup(struct mlx5_vport *vport) +{ + esw_acl_egress_ofld_rules_destroy(vport); + esw_acl_egress_ofld_groups_destroy(vport); + esw_acl_egress_table_destroy(vport); +} + +int mlx5_esw_acl_egress_vport_bond(struct mlx5_eswitch *esw, u16 active_vport_num, + u16 passive_vport_num) +{ + struct mlx5_vport *passive_vport = mlx5_eswitch_get_vport(esw, passive_vport_num); + struct mlx5_vport *active_vport = mlx5_eswitch_get_vport(esw, active_vport_num); + struct mlx5_flow_destination fwd_dest = {}; + + if (IS_ERR(active_vport)) + return PTR_ERR(active_vport); + if (IS_ERR(passive_vport)) + return PTR_ERR(passive_vport); + + /* Cleanup and recreate rules WITHOUT fwd2vport of active vport */ + esw_acl_egress_ofld_rules_destroy(active_vport); + esw_acl_egress_ofld_rules_create(esw, active_vport, NULL); + + /* Cleanup and recreate all rules + fwd2vport rule of passive vport to forward */ + esw_acl_egress_ofld_rules_destroy(passive_vport); + fwd_dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + fwd_dest.vport.num = active_vport_num; + fwd_dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); + fwd_dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; + + return esw_acl_egress_ofld_rules_create(esw, passive_vport, &fwd_dest); +} + +int mlx5_esw_acl_egress_vport_unbond(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + + if (IS_ERR(vport)) + return PTR_ERR(vport); + + esw_acl_egress_ofld_rules_destroy(vport); + return esw_acl_egress_ofld_rules_create(esw, vport, NULL); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c new file mode 100644 index 0000000000..d599e50af3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#include "mlx5_core.h" +#include "eswitch.h" +#include "helper.h" + +struct mlx5_flow_table * +esw_acl_table_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int ns, int size) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *acl; + int acl_supported; + u16 vport_num; + int err; + + acl_supported = (ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS) ? + MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support) : + MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support); + + if (!acl_supported) + return ERR_PTR(-EOPNOTSUPP); + + vport_num = vport->vport; + esw_debug(dev, "Create vport[%d] %s ACL table\n", vport_num, + ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS ? "ingress" : "egress"); + + root_ns = mlx5_get_flow_vport_acl_namespace(dev, ns, vport->index); + if (!root_ns) { + esw_warn(dev, "Failed to get E-Switch root namespace for vport (%d)\n", + vport_num); + return ERR_PTR(-EOPNOTSUPP); + } + + ft_attr.max_fte = size; + if (vport_num || mlx5_core_is_ecpf(esw->dev)) + ft_attr.flags = MLX5_FLOW_TABLE_OTHER_VPORT; + acl = mlx5_create_vport_flow_table(root_ns, &ft_attr, vport_num); + if (IS_ERR(acl)) { + err = PTR_ERR(acl); + esw_warn(dev, "vport[%d] create %s ACL table, err(%d)\n", vport_num, + ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS ? "ingress" : "egress", err); + } + return acl; +} + +int esw_egress_acl_vlan_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_flow_destination *fwd_dest, + u16 vlan_id, u32 flow_action) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec; + int err = 0; + + if (vport->egress.allowed_vlan) + return -EEXIST; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vlan_id); + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + flow_act.action = flow_action; + vport->egress.allowed_vlan = + mlx5_add_flow_rules(vport->egress.acl, spec, + &flow_act, fwd_dest, 0); + if (IS_ERR(vport->egress.allowed_vlan)) { + err = PTR_ERR(vport->egress.allowed_vlan); + esw_warn(esw->dev, + "vport[%d] configure egress vlan rule failed, err(%d)\n", + vport->vport, err); + vport->egress.allowed_vlan = NULL; + } + + kvfree(spec); + return err; +} + +void esw_acl_egress_vlan_destroy(struct mlx5_vport *vport) +{ + if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan)) { + mlx5_del_flow_rules(vport->egress.allowed_vlan); + vport->egress.allowed_vlan = NULL; + } +} + +int esw_acl_egress_vlan_grp_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *vlan_grp; + void *match_criteria; + u32 *flow_group_in; + int ret = 0; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + MLX5_SET(create_flow_group_in, flow_group_in, + match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, + flow_group_in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.first_vid); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); + + vlan_grp = mlx5_create_flow_group(vport->egress.acl, flow_group_in); + if (IS_ERR(vlan_grp)) { + ret = PTR_ERR(vlan_grp); + esw_warn(esw->dev, + "Failed to create E-Switch vport[%d] egress pop vlans flow group, err(%d)\n", + vport->vport, ret); + goto out; + } + vport->egress.vlan_grp = vlan_grp; + +out: + kvfree(flow_group_in); + return ret; +} + +void esw_acl_egress_vlan_grp_destroy(struct mlx5_vport *vport) +{ + if (!IS_ERR_OR_NULL(vport->egress.vlan_grp)) { + mlx5_destroy_flow_group(vport->egress.vlan_grp); + vport->egress.vlan_grp = NULL; + } +} + +void esw_acl_egress_table_destroy(struct mlx5_vport *vport) +{ + if (IS_ERR_OR_NULL(vport->egress.acl)) + return; + + mlx5_destroy_flow_table(vport->egress.acl); + vport->egress.acl = NULL; +} + +void esw_acl_ingress_table_destroy(struct mlx5_vport *vport) +{ + if (!vport->ingress.acl) + return; + + mlx5_destroy_flow_table(vport->ingress.acl); + vport->ingress.acl = NULL; +} + +void esw_acl_ingress_allow_rule_destroy(struct mlx5_vport *vport) +{ + if (!vport->ingress.allow_rule) + return; + + mlx5_del_flow_rules(vport->ingress.allow_rule); + vport->ingress.allow_rule = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h new file mode 100644 index 0000000000..a47063fab5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#ifndef __MLX5_ESWITCH_ACL_HELPER_H__ +#define __MLX5_ESWITCH_ACL_HELPER_H__ + +#include "eswitch.h" + +/* General acl helper functions */ +struct mlx5_flow_table * +esw_acl_table_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int ns, int size); + +/* Egress acl helper functions */ +void esw_acl_egress_table_destroy(struct mlx5_vport *vport); +int esw_egress_acl_vlan_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + struct mlx5_flow_destination *fwd_dest, + u16 vlan_id, u32 flow_action); +void esw_acl_egress_vlan_destroy(struct mlx5_vport *vport); +int esw_acl_egress_vlan_grp_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void esw_acl_egress_vlan_grp_destroy(struct mlx5_vport *vport); + +/* Ingress acl helper functions */ +void esw_acl_ingress_table_destroy(struct mlx5_vport *vport); +void esw_acl_ingress_allow_rule_destroy(struct mlx5_vport *vport); + +#endif /* __MLX5_ESWITCH_ACL_HELPER_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c new file mode 100644 index 0000000000..093ed86a0a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c @@ -0,0 +1,305 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#include "mlx5_core.h" +#include "eswitch.h" +#include "helper.h" +#include "lgcy.h" + +static void esw_acl_ingress_lgcy_rules_destroy(struct mlx5_vport *vport) +{ + if (vport->ingress.legacy.drop_rule) { + mlx5_del_flow_rules(vport->ingress.legacy.drop_rule); + vport->ingress.legacy.drop_rule = NULL; + } + esw_acl_ingress_allow_rule_destroy(vport); +} + +static int esw_acl_ingress_lgcy_groups_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_group *g; + void *match_criteria; + u32 *flow_group_in; + int err; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "vport[%d] ingress create untagged spoofchk flow group, err(%d)\n", + vport->vport, err); + goto spoof_err; + } + vport->ingress.legacy.allow_untagged_spoofchk_grp = g; + + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "vport[%d] ingress create untagged flow group, err(%d)\n", + vport->vport, err); + goto untagged_err; + } + vport->ingress.legacy.allow_untagged_only_grp = g; + + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 2); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 2); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "vport[%d] ingress create spoofchk flow group, err(%d)\n", + vport->vport, err); + goto allow_spoof_err; + } + vport->ingress.legacy.allow_spoofchk_only_grp = g; + + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 3); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 3); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "vport[%d] ingress create drop flow group, err(%d)\n", + vport->vport, err); + goto drop_err; + } + vport->ingress.legacy.drop_grp = g; + kvfree(flow_group_in); + return 0; + +drop_err: + if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_spoofchk_only_grp)) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp); + vport->ingress.legacy.allow_spoofchk_only_grp = NULL; + } +allow_spoof_err: + if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_only_grp)) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp); + vport->ingress.legacy.allow_untagged_only_grp = NULL; + } +untagged_err: + if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_spoofchk_grp)) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp); + vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL; + } +spoof_err: + kvfree(flow_group_in); + return err; +} + +static void esw_acl_ingress_lgcy_groups_destroy(struct mlx5_vport *vport) +{ + if (vport->ingress.legacy.allow_spoofchk_only_grp) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp); + vport->ingress.legacy.allow_spoofchk_only_grp = NULL; + } + if (vport->ingress.legacy.allow_untagged_only_grp) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp); + vport->ingress.legacy.allow_untagged_only_grp = NULL; + } + if (vport->ingress.legacy.allow_untagged_spoofchk_grp) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp); + vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL; + } + if (vport->ingress.legacy.drop_grp) { + mlx5_destroy_flow_group(vport->ingress.legacy.drop_grp); + vport->ingress.legacy.drop_grp = NULL; + } +} + +int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + bool vst_mode_steering = esw_vst_mode_is_steering(esw); + struct mlx5_flow_destination drop_ctr_dst = {}; + struct mlx5_flow_destination *dst = NULL; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec = NULL; + struct mlx5_fc *counter = NULL; + bool vst_check_cvlan = false; + bool vst_push_cvlan = false; + /* The ingress acl table contains 4 groups + * (2 active rules at the same time - + * 1 allow rule from one of the first 3 groups. + * 1 drop rule from the last group): + * 1)Allow untagged traffic with smac=original mac. + * 2)Allow untagged traffic. + * 3)Allow traffic with smac=original mac. + * 4)Drop all other traffic. + */ + int table_size = 4; + int dest_num = 0; + int err = 0; + u8 *smac_v; + + esw_acl_ingress_lgcy_rules_destroy(vport); + + if (vport->ingress.legacy.drop_counter) { + counter = vport->ingress.legacy.drop_counter; + } else if (MLX5_CAP_ESW_INGRESS_ACL(esw->dev, flow_counter)) { + counter = mlx5_fc_create(esw->dev, false); + if (IS_ERR(counter)) { + esw_warn(esw->dev, + "vport[%d] configure ingress drop rule counter failed\n", + vport->vport); + counter = NULL; + } + vport->ingress.legacy.drop_counter = counter; + } + + if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) { + esw_acl_ingress_lgcy_cleanup(esw, vport); + return 0; + } + + if (!vport->ingress.acl) { + vport->ingress.acl = esw_acl_table_create(esw, vport, + MLX5_FLOW_NAMESPACE_ESW_INGRESS, + table_size); + if (IS_ERR(vport->ingress.acl)) { + err = PTR_ERR(vport->ingress.acl); + vport->ingress.acl = NULL; + return err; + } + + err = esw_acl_ingress_lgcy_groups_create(esw, vport); + if (err) + goto out; + } + + esw_debug(esw->dev, + "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n", + vport->vport, vport->info.vlan, vport->info.qos); + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + err = -ENOMEM; + goto out; + } + + if ((vport->info.vlan || vport->info.qos)) { + if (vst_mode_steering) + vst_push_cvlan = true; + else if (!MLX5_CAP_ESW(esw->dev, vport_cvlan_insert_always)) + vst_check_cvlan = true; + } + + if (vst_check_cvlan || vport->info.spoofchk) + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + /* Create ingress allow rule */ + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + if (vst_push_cvlan) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + flow_act.vlan[0].prio = vport->info.qos; + flow_act.vlan[0].vid = vport->info.vlan; + flow_act.vlan[0].ethtype = ETH_P_8021Q; + } + + if (vst_check_cvlan) + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.cvlan_tag); + + if (vport->info.spoofchk) { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.smac_15_0); + smac_v = MLX5_ADDR_OF(fte_match_param, + spec->match_value, + outer_headers.smac_47_16); + ether_addr_copy(smac_v, vport->info.mac); + } + + vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec, + &flow_act, NULL, 0); + if (IS_ERR(vport->ingress.allow_rule)) { + err = PTR_ERR(vport->ingress.allow_rule); + esw_warn(esw->dev, + "vport[%d] configure ingress allow rule, err(%d)\n", + vport->vport, err); + vport->ingress.allow_rule = NULL; + goto out; + } + + if (!vst_check_cvlan && !vport->info.spoofchk) + goto out; + + memset(&flow_act, 0, sizeof(flow_act)); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + /* Attach drop flow counter */ + if (counter) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + drop_ctr_dst.counter_id = mlx5_fc_id(counter); + dst = &drop_ctr_dst; + dest_num++; + } + vport->ingress.legacy.drop_rule = + mlx5_add_flow_rules(vport->ingress.acl, NULL, + &flow_act, dst, dest_num); + if (IS_ERR(vport->ingress.legacy.drop_rule)) { + err = PTR_ERR(vport->ingress.legacy.drop_rule); + esw_warn(esw->dev, + "vport[%d] configure ingress drop rule, err(%d)\n", + vport->vport, err); + vport->ingress.legacy.drop_rule = NULL; + goto out; + } + kvfree(spec); + return 0; + +out: + if (err) + esw_acl_ingress_lgcy_cleanup(esw, vport); + kvfree(spec); + return err; +} + +void esw_acl_ingress_lgcy_cleanup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (IS_ERR_OR_NULL(vport->ingress.acl)) + goto clean_drop_counter; + + esw_debug(esw->dev, "Destroy vport[%d] E-Switch ingress ACL\n", vport->vport); + + esw_acl_ingress_lgcy_rules_destroy(vport); + esw_acl_ingress_lgcy_groups_destroy(vport); + esw_acl_ingress_table_destroy(vport); + +clean_drop_counter: + if (vport->ingress.legacy.drop_counter) { + mlx5_fc_destroy(esw->dev, vport->ingress.legacy.drop_counter); + vport->ingress.legacy.drop_counter = NULL; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c new file mode 100644 index 0000000000..50d2ea3239 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c @@ -0,0 +1,408 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#include "mlx5_core.h" +#include "eswitch.h" +#include "helper.h" +#include "ofld.h" + +static bool +esw_acl_ingress_prio_tag_enabled(struct mlx5_eswitch *esw, + const struct mlx5_vport *vport) +{ + return (MLX5_CAP_GEN(esw->dev, prio_tag_required) && + mlx5_eswitch_is_vf_vport(esw, vport->vport)); +} + +static int esw_acl_ingress_prio_tag_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec; + int err = 0; + + /* For prio tag mode, there is only 1 FTEs: + * 1) Untagged packets - push prio tag VLAN and modify metadata if + * required, allow + * Unmatched traffic is allowed by default + */ + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + /* Untagged packets - push prio tag VLAN, allow */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 0); + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH | + MLX5_FLOW_CONTEXT_ACTION_ALLOW; + flow_act.vlan[0].ethtype = ETH_P_8021Q; + flow_act.vlan[0].vid = 0; + flow_act.vlan[0].prio = 0; + + if (vport->ingress.offloads.modify_metadata_rule) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.modify_hdr = vport->ingress.offloads.modify_metadata; + } + + vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec, + &flow_act, NULL, 0); + if (IS_ERR(vport->ingress.allow_rule)) { + err = PTR_ERR(vport->ingress.allow_rule); + esw_warn(esw->dev, + "vport[%d] configure ingress untagged allow rule, err(%d)\n", + vport->vport, err); + vport->ingress.allow_rule = NULL; + } + + kvfree(spec); + return err; +} + +static int esw_acl_ingress_mod_metadata_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + struct mlx5_flow_act flow_act = {}; + int err = 0; + u32 key; + + key = mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport); + key >>= ESW_SOURCE_PORT_METADATA_OFFSET; + + MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action, field, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_0); + MLX5_SET(set_action_in, action, data, key); + MLX5_SET(set_action_in, action, offset, + ESW_SOURCE_PORT_METADATA_OFFSET); + MLX5_SET(set_action_in, action, length, + ESW_SOURCE_PORT_METADATA_BITS); + + vport->ingress.offloads.modify_metadata = + mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, + 1, action); + if (IS_ERR(vport->ingress.offloads.modify_metadata)) { + err = PTR_ERR(vport->ingress.offloads.modify_metadata); + esw_warn(esw->dev, + "failed to alloc modify header for vport %d ingress acl (%d)\n", + vport->vport, err); + return err; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW; + flow_act.modify_hdr = vport->ingress.offloads.modify_metadata; + flow_act.fg = vport->ingress.offloads.metadata_allmatch_grp; + vport->ingress.offloads.modify_metadata_rule = + mlx5_add_flow_rules(vport->ingress.acl, + NULL, &flow_act, NULL, 0); + if (IS_ERR(vport->ingress.offloads.modify_metadata_rule)) { + err = PTR_ERR(vport->ingress.offloads.modify_metadata_rule); + esw_warn(esw->dev, + "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n", + vport->vport, err); + mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata); + vport->ingress.offloads.modify_metadata_rule = NULL; + } + return err; +} + +static void esw_acl_ingress_mod_metadata_destroy(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (!vport->ingress.offloads.modify_metadata_rule) + return; + + mlx5_del_flow_rules(vport->ingress.offloads.modify_metadata_rule); + mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata); + vport->ingress.offloads.modify_metadata_rule = NULL; +} + +static int esw_acl_ingress_src_port_drop_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *flow_rule; + int err = 0; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + flow_act.fg = vport->ingress.offloads.drop_grp; + flow_rule = mlx5_add_flow_rules(vport->ingress.acl, NULL, &flow_act, NULL, 0); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + goto out; + } + + vport->ingress.offloads.drop_rule = flow_rule; +out: + return err; +} + +static void esw_acl_ingress_src_port_drop_destroy(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (!vport->ingress.offloads.drop_rule) + return; + + mlx5_del_flow_rules(vport->ingress.offloads.drop_rule); + vport->ingress.offloads.drop_rule = NULL; +} + +static int esw_acl_ingress_ofld_rules_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int err; + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + err = esw_acl_ingress_mod_metadata_create(esw, vport); + if (err) { + esw_warn(esw->dev, + "vport(%d) create ingress modify metadata, err(%d)\n", + vport->vport, err); + return err; + } + } + + if (esw_acl_ingress_prio_tag_enabled(esw, vport)) { + err = esw_acl_ingress_prio_tag_create(esw, vport); + if (err) { + esw_warn(esw->dev, + "vport(%d) create ingress prio tag rule, err(%d)\n", + vport->vport, err); + goto prio_tag_err; + } + } + + return 0; + +prio_tag_err: + esw_acl_ingress_mod_metadata_destroy(esw, vport); + return err; +} + +static void esw_acl_ingress_ofld_rules_destroy(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + esw_acl_ingress_allow_rule_destroy(vport); + esw_acl_ingress_mod_metadata_destroy(esw, vport); + esw_acl_ingress_src_port_drop_destroy(esw, vport); +} + +static int esw_acl_ingress_ofld_groups_create(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + void *match_criteria; + u32 *flow_group_in; + u32 flow_index = 0; + int ret = 0; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + if (vport->vport == MLX5_VPORT_UPLINK) { + /* This group can hold an FTE to drop all traffic. + * Need in case LAG is enabled. + */ + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + ret = PTR_ERR(g); + esw_warn(esw->dev, "vport[%d] ingress create drop flow group, err(%d)\n", + vport->vport, ret); + goto drop_err; + } + vport->ingress.offloads.drop_grp = g; + flow_index++; + } + + if (esw_acl_ingress_prio_tag_enabled(esw, vport)) { + /* This group is to hold FTE to match untagged packets when prio_tag + * is enabled. + */ + memset(flow_group_in, 0, inlen); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, + flow_group_in, match_criteria); + MLX5_SET(create_flow_group_in, flow_group_in, + match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + ret = PTR_ERR(g); + esw_warn(esw->dev, "vport[%d] ingress create untagged flow group, err(%d)\n", + vport->vport, ret); + goto prio_tag_err; + } + vport->ingress.offloads.metadata_prio_tag_grp = g; + flow_index++; + } + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + /* This group holds an FTE with no match to add metadata for + * tagged packets if prio-tag is enabled, or for all untagged + * traffic in case prio-tag is disabled. + */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + ret = PTR_ERR(g); + esw_warn(esw->dev, "vport[%d] ingress create drop flow group, err(%d)\n", + vport->vport, ret); + goto metadata_err; + } + vport->ingress.offloads.metadata_allmatch_grp = g; + } + + kvfree(flow_group_in); + return 0; + +metadata_err: + if (!IS_ERR_OR_NULL(vport->ingress.offloads.metadata_prio_tag_grp)) { + mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp); + vport->ingress.offloads.metadata_prio_tag_grp = NULL; + } +prio_tag_err: + if (!IS_ERR_OR_NULL(vport->ingress.offloads.drop_grp)) { + mlx5_destroy_flow_group(vport->ingress.offloads.drop_grp); + vport->ingress.offloads.drop_grp = NULL; + } +drop_err: + kvfree(flow_group_in); + return ret; +} + +static void esw_acl_ingress_ofld_groups_destroy(struct mlx5_vport *vport) +{ + if (vport->ingress.offloads.metadata_allmatch_grp) { + mlx5_destroy_flow_group(vport->ingress.offloads.metadata_allmatch_grp); + vport->ingress.offloads.metadata_allmatch_grp = NULL; + } + + if (vport->ingress.offloads.metadata_prio_tag_grp) { + mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp); + vport->ingress.offloads.metadata_prio_tag_grp = NULL; + } + + if (vport->ingress.offloads.drop_grp) { + mlx5_destroy_flow_group(vport->ingress.offloads.drop_grp); + vport->ingress.offloads.drop_grp = NULL; + } +} + +int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int num_ftes = 0; + int err; + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw) && + !esw_acl_ingress_prio_tag_enabled(esw, vport)) + return 0; + + esw_acl_ingress_allow_rule_destroy(vport); + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) + num_ftes++; + if (vport->vport == MLX5_VPORT_UPLINK) + num_ftes++; + if (esw_acl_ingress_prio_tag_enabled(esw, vport)) + num_ftes++; + + vport->ingress.acl = esw_acl_table_create(esw, vport, + MLX5_FLOW_NAMESPACE_ESW_INGRESS, + num_ftes); + if (IS_ERR(vport->ingress.acl)) { + err = PTR_ERR(vport->ingress.acl); + vport->ingress.acl = NULL; + return err; + } + + err = esw_acl_ingress_ofld_groups_create(esw, vport); + if (err) + goto group_err; + + esw_debug(esw->dev, + "vport[%d] configure ingress rules\n", vport->vport); + + err = esw_acl_ingress_ofld_rules_create(esw, vport); + if (err) + goto rules_err; + + return 0; + +rules_err: + esw_acl_ingress_ofld_groups_destroy(vport); +group_err: + esw_acl_ingress_table_destroy(vport); + return err; +} + +void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + esw_acl_ingress_ofld_rules_destroy(esw, vport); + esw_acl_ingress_ofld_groups_destroy(vport); + esw_acl_ingress_table_destroy(vport); +} + +/* Caller must hold rtnl_lock */ +int mlx5_esw_acl_ingress_vport_metadata_update(struct mlx5_eswitch *esw, u16 vport_num, + u32 metadata) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + int err; + + if (WARN_ON_ONCE(IS_ERR(vport))) { + esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num); + return PTR_ERR(vport); + } + + esw_acl_ingress_ofld_rules_destroy(esw, vport); + + vport->metadata = metadata ? metadata : vport->default_metadata; + + /* Recreate ingress acl rules with vport->metadata */ + err = esw_acl_ingress_ofld_rules_create(esw, vport); + if (err) + goto out; + + return 0; + +out: + vport->metadata = vport->default_metadata; + return err; +} + +int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + + if (IS_ERR(vport)) { + esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num); + return PTR_ERR(vport); + } + + return esw_acl_ingress_src_port_drop_create(esw, vport); +} + +void mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + + if (WARN_ON_ONCE(IS_ERR(vport))) { + esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num); + return; + } + + esw_acl_ingress_src_port_drop_destroy(esw, vport); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h new file mode 100644 index 0000000000..44c152da3d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#ifndef __MLX5_ESWITCH_ACL_LGCY_H__ +#define __MLX5_ESWITCH_ACL_LGCY_H__ + +#include "eswitch.h" + +/* Eswitch acl egress external APIs */ +int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void esw_acl_egress_lgcy_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); + +/* Eswitch acl ingress external APIs */ +int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void esw_acl_ingress_lgcy_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); + +#endif /* __MLX5_ESWITCH_ACL_LGCY_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h new file mode 100644 index 0000000000..536b04e836 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ + +#ifndef __MLX5_ESWITCH_ACL_OFLD_H__ +#define __MLX5_ESWITCH_ACL_OFLD_H__ + +#include "eswitch.h" + +#ifdef CONFIG_MLX5_ESWITCH +/* Eswitch acl egress external APIs */ +int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void esw_acl_egress_ofld_cleanup(struct mlx5_vport *vport); +void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport, int rule_index); +int mlx5_esw_acl_egress_vport_bond(struct mlx5_eswitch *esw, u16 active_vport_num, + u16 passive_vport_num); +int mlx5_esw_acl_egress_vport_unbond(struct mlx5_eswitch *esw, u16 vport_num); + +static inline bool mlx5_esw_acl_egress_fwd2vport_supported(struct mlx5_eswitch *esw) +{ + return esw && esw->mode == MLX5_ESWITCH_OFFLOADS && + mlx5_eswitch_vport_match_metadata_enabled(esw) && + MLX5_CAP_ESW_FLOWTABLE(esw->dev, egress_acl_forward_to_vport); +} + +/* Eswitch acl ingress external APIs */ +int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +int mlx5_esw_acl_ingress_vport_metadata_update(struct mlx5_eswitch *esw, u16 vport_num, + u32 metadata); +void mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw, u16 vport_num); +int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw, u16 vport_num); + +#else /* CONFIG_MLX5_ESWITCH */ +static void +mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw, + u16 vport_num) +{} + +static int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw, + u16 vport_num) +{ + return 0; +} +#endif /* CONFIG_MLX5_ESWITCH */ +#endif /* __MLX5_ESWITCH_ACL_OFLD_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c new file mode 100644 index 0000000000..1b9bc32efd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -0,0 +1,1952 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021 Mellanox Technologies. */ + +#include +#include +#include +#include +#include +#include "lib/devcom.h" +#include "bridge.h" +#include "eswitch.h" +#include "bridge_priv.h" +#define CREATE_TRACE_POINTS +#include "diag/bridge_tracepoint.h" + +static const struct rhashtable_params fdb_ht_params = { + .key_offset = offsetof(struct mlx5_esw_bridge_fdb_entry, key), + .key_len = sizeof(struct mlx5_esw_bridge_fdb_key), + .head_offset = offsetof(struct mlx5_esw_bridge_fdb_entry, ht_node), + .automatic_shrinking = true, +}; + +static void +mlx5_esw_bridge_fdb_offload_notify(struct net_device *dev, const unsigned char *addr, u16 vid, + unsigned long val) +{ + struct switchdev_notifier_fdb_info send_info = {}; + + send_info.addr = addr; + send_info.vid = vid; + send_info.offloaded = true; + call_switchdev_notifiers(val, dev, &send_info.info, NULL); +} + +static void +mlx5_esw_bridge_fdb_del_notify(struct mlx5_esw_bridge_fdb_entry *entry) +{ + if (!(entry->flags & (MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER | MLX5_ESW_BRIDGE_FLAG_PEER))) + mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr, + entry->key.vid, + SWITCHDEV_FDB_DEL_TO_BRIDGE); +} + +static bool mlx5_esw_bridge_pkt_reformat_vlan_pop_supported(struct mlx5_eswitch *esw) +{ + return BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat_remove)) && + MLX5_CAP_GEN_2(esw->dev, max_reformat_remove_size) >= sizeof(struct vlan_hdr) && + MLX5_CAP_GEN_2(esw->dev, max_reformat_remove_offset) >= + offsetof(struct vlan_ethhdr, h_vlan_proto); +} + +static struct mlx5_pkt_reformat * +mlx5_esw_bridge_pkt_reformat_vlan_pop_create(struct mlx5_eswitch *esw) +{ + struct mlx5_pkt_reformat_params reformat_params = {}; + + reformat_params.type = MLX5_REFORMAT_TYPE_REMOVE_HDR; + reformat_params.param_0 = MLX5_REFORMAT_CONTEXT_ANCHOR_MAC_START; + reformat_params.param_1 = offsetof(struct vlan_ethhdr, h_vlan_proto); + reformat_params.size = sizeof(struct vlan_hdr); + return mlx5_packet_reformat_alloc(esw->dev, &reformat_params, MLX5_FLOW_NAMESPACE_FDB); +} + +struct mlx5_flow_table * +mlx5_esw_bridge_table_create(int max_fte, u32 level, struct mlx5_eswitch *esw) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *fdb; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!ns) { + esw_warn(dev, "Failed to get FDB namespace\n"); + return ERR_PTR(-ENOENT); + } + + ft_attr.flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + ft_attr.max_fte = max_fte; + ft_attr.level = level; + ft_attr.prio = FDB_BR_OFFLOAD; + fdb = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(fdb)) + esw_warn(dev, "Failed to create bridge FDB Table (err=%ld)\n", PTR_ERR(fdb)); + + return fdb; +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_ingress_vlan_proto_fg_create(unsigned int from, unsigned int to, u16 vlan_proto, + struct mlx5_eswitch *esw, + struct mlx5_flow_table *ingress_ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in, *match; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2); + match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_15_0); + if (vlan_proto == ETH_P_8021Q) + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag); + else if (vlan_proto == ETH_P_8021AD) + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.first_vid); + + MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + + MLX5_SET(create_flow_group_in, in, start_flow_index, from); + MLX5_SET(create_flow_group_in, in, end_flow_index, to); + + fg = mlx5_create_flow_group(ingress_ft, in); + kvfree(in); + if (IS_ERR(fg)) + esw_warn(esw->dev, + "Failed to create VLAN(proto=%x) flow group for bridge ingress table (err=%ld)\n", + vlan_proto, PTR_ERR(fg)); + + return fg; +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_ingress_vlan_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ingress_ft) +{ + unsigned int from = MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM; + unsigned int to = MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO; + + return mlx5_esw_bridge_ingress_vlan_proto_fg_create(from, to, ETH_P_8021Q, esw, ingress_ft); +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_ingress_qinq_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ingress_ft) +{ + unsigned int from = MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM; + unsigned int to = MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO; + + return mlx5_esw_bridge_ingress_vlan_proto_fg_create(from, to, ETH_P_8021AD, esw, + ingress_ft); +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_ingress_vlan_proto_filter_fg_create(unsigned int from, unsigned int to, + u16 vlan_proto, struct mlx5_eswitch *esw, + struct mlx5_flow_table *ingress_ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in, *match; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2); + match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_15_0); + if (vlan_proto == ETH_P_8021Q) + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag); + else if (vlan_proto == ETH_P_8021AD) + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.svlan_tag); + MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + + MLX5_SET(create_flow_group_in, in, start_flow_index, from); + MLX5_SET(create_flow_group_in, in, end_flow_index, to); + + fg = mlx5_create_flow_group(ingress_ft, in); + if (IS_ERR(fg)) + esw_warn(esw->dev, + "Failed to create bridge ingress table VLAN filter flow group (err=%ld)\n", + PTR_ERR(fg)); + kvfree(in); + return fg; +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_ingress_vlan_filter_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ingress_ft) +{ + unsigned int from = MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM; + unsigned int to = MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO; + + return mlx5_esw_bridge_ingress_vlan_proto_filter_fg_create(from, to, ETH_P_8021Q, esw, + ingress_ft); +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_ingress_qinq_filter_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ingress_ft) +{ + unsigned int from = MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM; + unsigned int to = MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO; + + return mlx5_esw_bridge_ingress_vlan_proto_filter_fg_create(from, to, ETH_P_8021AD, esw, + ingress_ft); +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_ingress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *ingress_ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in, *match; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2); + match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_15_0); + + MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + + MLX5_SET(create_flow_group_in, in, start_flow_index, + MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM); + MLX5_SET(create_flow_group_in, in, end_flow_index, + MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO); + + fg = mlx5_create_flow_group(ingress_ft, in); + if (IS_ERR(fg)) + esw_warn(esw->dev, + "Failed to create MAC flow group for bridge ingress table (err=%ld)\n", + PTR_ERR(fg)); + + kvfree(in); + return fg; +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_egress_vlan_proto_fg_create(unsigned int from, unsigned int to, u16 vlan_proto, + struct mlx5_eswitch *esw, + struct mlx5_flow_table *egress_ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in, *match; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_47_16); + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_15_0); + if (vlan_proto == ETH_P_8021Q) + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag); + else if (vlan_proto == ETH_P_8021AD) + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.first_vid); + + MLX5_SET(create_flow_group_in, in, start_flow_index, from); + MLX5_SET(create_flow_group_in, in, end_flow_index, to); + + fg = mlx5_create_flow_group(egress_ft, in); + if (IS_ERR(fg)) + esw_warn(esw->dev, + "Failed to create VLAN flow group for bridge egress table (err=%ld)\n", + PTR_ERR(fg)); + kvfree(in); + return fg; +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_egress_vlan_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *egress_ft) +{ + unsigned int from = MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_FROM; + unsigned int to = MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO; + + return mlx5_esw_bridge_egress_vlan_proto_fg_create(from, to, ETH_P_8021Q, esw, egress_ft); +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_egress_qinq_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *egress_ft) +{ + unsigned int from = MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM; + unsigned int to = MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO; + + return mlx5_esw_bridge_egress_vlan_proto_fg_create(from, to, ETH_P_8021AD, esw, egress_ft); +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_egress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *egress_ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in, *match; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_47_16); + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_15_0); + + MLX5_SET(create_flow_group_in, in, start_flow_index, + MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM); + MLX5_SET(create_flow_group_in, in, end_flow_index, + MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO); + + fg = mlx5_create_flow_group(egress_ft, in); + if (IS_ERR(fg)) + esw_warn(esw->dev, + "Failed to create bridge egress table MAC flow group (err=%ld)\n", + PTR_ERR(fg)); + kvfree(in); + return fg; +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_egress_miss_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *egress_ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in, *match; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS_2); + match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK); + + MLX5_SET(create_flow_group_in, in, start_flow_index, + MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM); + MLX5_SET(create_flow_group_in, in, end_flow_index, + MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO); + + fg = mlx5_create_flow_group(egress_ft, in); + if (IS_ERR(fg)) + esw_warn(esw->dev, + "Failed to create bridge egress table miss flow group (err=%ld)\n", + PTR_ERR(fg)); + kvfree(in); + return fg; +} + +static int +mlx5_esw_bridge_ingress_table_init(struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_flow_group *mac_fg, *qinq_filter_fg, *qinq_fg, *vlan_filter_fg, *vlan_fg; + struct mlx5_flow_table *ingress_ft, *skip_ft; + struct mlx5_eswitch *esw = br_offloads->esw; + int err; + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) + return -EOPNOTSUPP; + + ingress_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE, + MLX5_ESW_BRIDGE_LEVEL_INGRESS_TABLE, + esw); + if (IS_ERR(ingress_ft)) + return PTR_ERR(ingress_ft); + + skip_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_SKIP_TABLE_SIZE, + MLX5_ESW_BRIDGE_LEVEL_SKIP_TABLE, + esw); + if (IS_ERR(skip_ft)) { + err = PTR_ERR(skip_ft); + goto err_skip_tbl; + } + + vlan_fg = mlx5_esw_bridge_ingress_vlan_fg_create(esw, ingress_ft); + if (IS_ERR(vlan_fg)) { + err = PTR_ERR(vlan_fg); + goto err_vlan_fg; + } + + vlan_filter_fg = mlx5_esw_bridge_ingress_vlan_filter_fg_create(esw, ingress_ft); + if (IS_ERR(vlan_filter_fg)) { + err = PTR_ERR(vlan_filter_fg); + goto err_vlan_filter_fg; + } + + qinq_fg = mlx5_esw_bridge_ingress_qinq_fg_create(esw, ingress_ft); + if (IS_ERR(qinq_fg)) { + err = PTR_ERR(qinq_fg); + goto err_qinq_fg; + } + + qinq_filter_fg = mlx5_esw_bridge_ingress_qinq_filter_fg_create(esw, ingress_ft); + if (IS_ERR(qinq_filter_fg)) { + err = PTR_ERR(qinq_filter_fg); + goto err_qinq_filter_fg; + } + + mac_fg = mlx5_esw_bridge_ingress_mac_fg_create(esw, ingress_ft); + if (IS_ERR(mac_fg)) { + err = PTR_ERR(mac_fg); + goto err_mac_fg; + } + + br_offloads->ingress_ft = ingress_ft; + br_offloads->skip_ft = skip_ft; + br_offloads->ingress_vlan_fg = vlan_fg; + br_offloads->ingress_vlan_filter_fg = vlan_filter_fg; + br_offloads->ingress_qinq_fg = qinq_fg; + br_offloads->ingress_qinq_filter_fg = qinq_filter_fg; + br_offloads->ingress_mac_fg = mac_fg; + return 0; + +err_mac_fg: + mlx5_destroy_flow_group(qinq_filter_fg); +err_qinq_filter_fg: + mlx5_destroy_flow_group(qinq_fg); +err_qinq_fg: + mlx5_destroy_flow_group(vlan_filter_fg); +err_vlan_filter_fg: + mlx5_destroy_flow_group(vlan_fg); +err_vlan_fg: + mlx5_destroy_flow_table(skip_ft); +err_skip_tbl: + mlx5_destroy_flow_table(ingress_ft); + return err; +} + +static void +mlx5_esw_bridge_ingress_table_cleanup(struct mlx5_esw_bridge_offloads *br_offloads) +{ + mlx5_destroy_flow_group(br_offloads->ingress_mac_fg); + br_offloads->ingress_mac_fg = NULL; + mlx5_destroy_flow_group(br_offloads->ingress_qinq_filter_fg); + br_offloads->ingress_qinq_filter_fg = NULL; + mlx5_destroy_flow_group(br_offloads->ingress_qinq_fg); + br_offloads->ingress_qinq_fg = NULL; + mlx5_destroy_flow_group(br_offloads->ingress_vlan_filter_fg); + br_offloads->ingress_vlan_filter_fg = NULL; + mlx5_destroy_flow_group(br_offloads->ingress_vlan_fg); + br_offloads->ingress_vlan_fg = NULL; + mlx5_destroy_flow_table(br_offloads->skip_ft); + br_offloads->skip_ft = NULL; + mlx5_destroy_flow_table(br_offloads->ingress_ft); + br_offloads->ingress_ft = NULL; +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_egress_miss_flow_create(struct mlx5_flow_table *egress_ft, + struct mlx5_flow_table *skip_ft, + struct mlx5_pkt_reformat *pkt_reformat); + +static int +mlx5_esw_bridge_egress_table_init(struct mlx5_esw_bridge_offloads *br_offloads, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_flow_group *miss_fg = NULL, *mac_fg, *vlan_fg, *qinq_fg; + struct mlx5_pkt_reformat *miss_pkt_reformat = NULL; + struct mlx5_flow_handle *miss_handle = NULL; + struct mlx5_eswitch *esw = br_offloads->esw; + struct mlx5_flow_table *egress_ft; + int err; + + egress_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE, + MLX5_ESW_BRIDGE_LEVEL_EGRESS_TABLE, + esw); + if (IS_ERR(egress_ft)) + return PTR_ERR(egress_ft); + + vlan_fg = mlx5_esw_bridge_egress_vlan_fg_create(esw, egress_ft); + if (IS_ERR(vlan_fg)) { + err = PTR_ERR(vlan_fg); + goto err_vlan_fg; + } + + qinq_fg = mlx5_esw_bridge_egress_qinq_fg_create(esw, egress_ft); + if (IS_ERR(qinq_fg)) { + err = PTR_ERR(qinq_fg); + goto err_qinq_fg; + } + + mac_fg = mlx5_esw_bridge_egress_mac_fg_create(esw, egress_ft); + if (IS_ERR(mac_fg)) { + err = PTR_ERR(mac_fg); + goto err_mac_fg; + } + + if (mlx5_esw_bridge_pkt_reformat_vlan_pop_supported(esw)) { + miss_fg = mlx5_esw_bridge_egress_miss_fg_create(esw, egress_ft); + if (IS_ERR(miss_fg)) { + esw_warn(esw->dev, "Failed to create miss flow group (err=%ld)\n", + PTR_ERR(miss_fg)); + miss_fg = NULL; + goto skip_miss_flow; + } + + miss_pkt_reformat = mlx5_esw_bridge_pkt_reformat_vlan_pop_create(esw); + if (IS_ERR(miss_pkt_reformat)) { + esw_warn(esw->dev, + "Failed to alloc packet reformat REMOVE_HEADER (err=%ld)\n", + PTR_ERR(miss_pkt_reformat)); + miss_pkt_reformat = NULL; + mlx5_destroy_flow_group(miss_fg); + miss_fg = NULL; + goto skip_miss_flow; + } + + miss_handle = mlx5_esw_bridge_egress_miss_flow_create(egress_ft, + br_offloads->skip_ft, + miss_pkt_reformat); + if (IS_ERR(miss_handle)) { + esw_warn(esw->dev, "Failed to create miss flow (err=%ld)\n", + PTR_ERR(miss_handle)); + miss_handle = NULL; + mlx5_packet_reformat_dealloc(esw->dev, miss_pkt_reformat); + miss_pkt_reformat = NULL; + mlx5_destroy_flow_group(miss_fg); + miss_fg = NULL; + goto skip_miss_flow; + } + } +skip_miss_flow: + + bridge->egress_ft = egress_ft; + bridge->egress_vlan_fg = vlan_fg; + bridge->egress_qinq_fg = qinq_fg; + bridge->egress_mac_fg = mac_fg; + bridge->egress_miss_fg = miss_fg; + bridge->egress_miss_pkt_reformat = miss_pkt_reformat; + bridge->egress_miss_handle = miss_handle; + return 0; + +err_mac_fg: + mlx5_destroy_flow_group(qinq_fg); +err_qinq_fg: + mlx5_destroy_flow_group(vlan_fg); +err_vlan_fg: + mlx5_destroy_flow_table(egress_ft); + return err; +} + +static void +mlx5_esw_bridge_egress_table_cleanup(struct mlx5_esw_bridge *bridge) +{ + if (bridge->egress_miss_handle) + mlx5_del_flow_rules(bridge->egress_miss_handle); + if (bridge->egress_miss_pkt_reformat) + mlx5_packet_reformat_dealloc(bridge->br_offloads->esw->dev, + bridge->egress_miss_pkt_reformat); + if (bridge->egress_miss_fg) + mlx5_destroy_flow_group(bridge->egress_miss_fg); + mlx5_destroy_flow_group(bridge->egress_mac_fg); + mlx5_destroy_flow_group(bridge->egress_qinq_fg); + mlx5_destroy_flow_group(bridge->egress_vlan_fg); + mlx5_destroy_flow_table(bridge->egress_ft); +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_ingress_flow_with_esw_create(u16 vport_num, const unsigned char *addr, + struct mlx5_esw_bridge_vlan *vlan, u32 counter_id, + struct mlx5_esw_bridge *bridge, + struct mlx5_eswitch *esw) +{ + struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads; + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT, + .flags = FLOW_ACT_NO_APPEND, + }; + struct mlx5_flow_destination dests[2] = {}; + struct mlx5_flow_spec *rule_spec; + struct mlx5_flow_handle *handle; + u8 *smac_v, *smac_c; + + rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); + if (!rule_spec) + return ERR_PTR(-ENOMEM); + + rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2; + + smac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value, + outer_headers.smac_47_16); + ether_addr_copy(smac_v, addr); + smac_c = MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria, + outer_headers.smac_47_16); + eth_broadcast_addr(smac_c); + + MLX5_SET(fte_match_param, rule_spec->match_criteria, + misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); + MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num)); + + if (vlan && vlan->pkt_reformat_push) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.pkt_reformat = vlan->pkt_reformat_push; + flow_act.modify_hdr = vlan->pkt_mod_hdr_push_mark; + } else if (vlan) { + if (bridge->vlan_proto == ETH_P_8021Q) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.cvlan_tag); + } else if (bridge->vlan_proto == ETH_P_8021AD) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.svlan_tag); + } + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.first_vid); + MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.first_vid, + vlan->vid); + } + + dests[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dests[0].ft = bridge->egress_ft; + dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dests[1].counter_id = counter_id; + + handle = mlx5_add_flow_rules(br_offloads->ingress_ft, rule_spec, &flow_act, dests, + ARRAY_SIZE(dests)); + + kvfree(rule_spec); + return handle; +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, + struct mlx5_esw_bridge_vlan *vlan, u32 counter_id, + struct mlx5_esw_bridge *bridge) +{ + return mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter_id, + bridge, bridge->br_offloads->esw); +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_ingress_flow_peer_create(u16 vport_num, u16 esw_owner_vhca_id, + const unsigned char *addr, + struct mlx5_esw_bridge_vlan *vlan, u32 counter_id, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_devcom_comp_dev *devcom = bridge->br_offloads->esw->devcom, *pos; + struct mlx5_eswitch *tmp, *peer_esw = NULL; + static struct mlx5_flow_handle *handle; + + if (!mlx5_devcom_for_each_peer_begin(devcom)) + return ERR_PTR(-ENODEV); + + mlx5_devcom_for_each_peer_entry(devcom, tmp, pos) { + if (mlx5_esw_is_owner(tmp, vport_num, esw_owner_vhca_id)) { + peer_esw = tmp; + break; + } + } + + if (!peer_esw) { + handle = ERR_PTR(-ENODEV); + goto out; + } + + handle = mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter_id, + bridge, peer_esw); + +out: + mlx5_devcom_for_each_peer_end(devcom); + return handle; +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_ingress_filter_flow_create(u16 vport_num, const unsigned char *addr, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads; + struct mlx5_flow_destination dest = { + .type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE, + .ft = br_offloads->skip_ft, + }; + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flags = FLOW_ACT_NO_APPEND, + }; + struct mlx5_flow_spec *rule_spec; + struct mlx5_flow_handle *handle; + u8 *smac_v, *smac_c; + + rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); + if (!rule_spec) + return ERR_PTR(-ENOMEM); + + rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2; + + smac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value, + outer_headers.smac_47_16); + ether_addr_copy(smac_v, addr); + smac_c = MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria, + outer_headers.smac_47_16); + eth_broadcast_addr(smac_c); + + MLX5_SET(fte_match_param, rule_spec->match_criteria, + misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); + MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(br_offloads->esw, vport_num)); + + if (bridge->vlan_proto == ETH_P_8021Q) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.cvlan_tag); + } else if (bridge->vlan_proto == ETH_P_8021AD) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.svlan_tag); + } + + handle = mlx5_add_flow_rules(br_offloads->ingress_ft, rule_spec, &flow_act, &dest, 1); + + kvfree(rule_spec); + return handle; +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_egress_flow_create(u16 vport_num, u16 esw_owner_vhca_id, const unsigned char *addr, + struct mlx5_esw_bridge_vlan *vlan, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_flow_destination dest = { + .type = MLX5_FLOW_DESTINATION_TYPE_VPORT, + .vport.num = vport_num, + }; + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flags = FLOW_ACT_NO_APPEND, + }; + struct mlx5_flow_spec *rule_spec; + struct mlx5_flow_handle *handle; + u8 *dmac_v, *dmac_c; + + rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); + if (!rule_spec) + return ERR_PTR(-ENOMEM); + + if (MLX5_CAP_ESW_FLOWTABLE(bridge->br_offloads->esw->dev, flow_source) && + vport_num == MLX5_VPORT_UPLINK) + rule_spec->flow_context.flow_source = + MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; + rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + dmac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value, + outer_headers.dmac_47_16); + ether_addr_copy(dmac_v, addr); + dmac_c = MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria, + outer_headers.dmac_47_16); + eth_broadcast_addr(dmac_c); + + if (vlan) { + if (vlan->pkt_reformat_pop) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act.pkt_reformat = vlan->pkt_reformat_pop; + } + + if (bridge->vlan_proto == ETH_P_8021Q) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.cvlan_tag); + } else if (bridge->vlan_proto == ETH_P_8021AD) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.svlan_tag); + } + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.first_vid); + MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.first_vid, + vlan->vid); + } + + if (MLX5_CAP_ESW(bridge->br_offloads->esw->dev, merged_eswitch)) { + dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; + dest.vport.vhca_id = esw_owner_vhca_id; + } + handle = mlx5_add_flow_rules(bridge->egress_ft, rule_spec, &flow_act, &dest, 1); + + kvfree(rule_spec); + return handle; +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_egress_miss_flow_create(struct mlx5_flow_table *egress_ft, + struct mlx5_flow_table *skip_ft, + struct mlx5_pkt_reformat *pkt_reformat) +{ + struct mlx5_flow_destination dest = { + .type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE, + .ft = skip_ft, + }; + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT, + .flags = FLOW_ACT_NO_APPEND, + .pkt_reformat = pkt_reformat, + }; + struct mlx5_flow_spec *rule_spec; + struct mlx5_flow_handle *handle; + + rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); + if (!rule_spec) + return ERR_PTR(-ENOMEM); + + rule_spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + + MLX5_SET(fte_match_param, rule_spec->match_criteria, + misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK); + MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_1, + ESW_TUN_BRIDGE_INGRESS_PUSH_VLAN_MARK); + + handle = mlx5_add_flow_rules(egress_ft, rule_spec, &flow_act, &dest, 1); + + kvfree(rule_spec); + return handle; +} + +static struct mlx5_esw_bridge *mlx5_esw_bridge_create(struct net_device *br_netdev, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_esw_bridge *bridge; + int err; + + bridge = kvzalloc(sizeof(*bridge), GFP_KERNEL); + if (!bridge) + return ERR_PTR(-ENOMEM); + + bridge->br_offloads = br_offloads; + err = mlx5_esw_bridge_egress_table_init(br_offloads, bridge); + if (err) + goto err_egress_tbl; + + err = rhashtable_init(&bridge->fdb_ht, &fdb_ht_params); + if (err) + goto err_fdb_ht; + + err = mlx5_esw_bridge_mdb_init(bridge); + if (err) + goto err_mdb_ht; + + INIT_LIST_HEAD(&bridge->fdb_list); + bridge->ifindex = br_netdev->ifindex; + bridge->refcnt = 1; + bridge->ageing_time = clock_t_to_jiffies(BR_DEFAULT_AGEING_TIME); + bridge->vlan_proto = ETH_P_8021Q; + list_add(&bridge->list, &br_offloads->bridges); + mlx5_esw_bridge_debugfs_init(br_netdev, bridge); + + return bridge; + +err_mdb_ht: + rhashtable_destroy(&bridge->fdb_ht); +err_fdb_ht: + mlx5_esw_bridge_egress_table_cleanup(bridge); +err_egress_tbl: + kvfree(bridge); + return ERR_PTR(err); +} + +static void mlx5_esw_bridge_get(struct mlx5_esw_bridge *bridge) +{ + bridge->refcnt++; +} + +static void mlx5_esw_bridge_put(struct mlx5_esw_bridge_offloads *br_offloads, + struct mlx5_esw_bridge *bridge) +{ + if (--bridge->refcnt) + return; + + mlx5_esw_bridge_debugfs_cleanup(bridge); + mlx5_esw_bridge_egress_table_cleanup(bridge); + mlx5_esw_bridge_mcast_disable(bridge); + list_del(&bridge->list); + mlx5_esw_bridge_mdb_cleanup(bridge); + rhashtable_destroy(&bridge->fdb_ht); + kvfree(bridge); + + if (list_empty(&br_offloads->bridges)) + mlx5_esw_bridge_ingress_table_cleanup(br_offloads); +} + +static struct mlx5_esw_bridge * +mlx5_esw_bridge_lookup(struct net_device *br_netdev, struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_esw_bridge *bridge; + + ASSERT_RTNL(); + + list_for_each_entry(bridge, &br_offloads->bridges, list) { + if (bridge->ifindex == br_netdev->ifindex) { + mlx5_esw_bridge_get(bridge); + return bridge; + } + } + + if (!br_offloads->ingress_ft) { + int err = mlx5_esw_bridge_ingress_table_init(br_offloads); + + if (err) + return ERR_PTR(err); + } + + bridge = mlx5_esw_bridge_create(br_netdev, br_offloads); + if (IS_ERR(bridge) && list_empty(&br_offloads->bridges)) + mlx5_esw_bridge_ingress_table_cleanup(br_offloads); + return bridge; +} + +static unsigned long mlx5_esw_bridge_port_key_from_data(u16 vport_num, u16 esw_owner_vhca_id) +{ + return vport_num | (unsigned long)esw_owner_vhca_id << sizeof(vport_num) * BITS_PER_BYTE; +} + +unsigned long mlx5_esw_bridge_port_key(struct mlx5_esw_bridge_port *port) +{ + return mlx5_esw_bridge_port_key_from_data(port->vport_num, port->esw_owner_vhca_id); +} + +static int mlx5_esw_bridge_port_insert(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + return xa_insert(&br_offloads->ports, mlx5_esw_bridge_port_key(port), port, GFP_KERNEL); +} + +static struct mlx5_esw_bridge_port * +mlx5_esw_bridge_port_lookup(u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + return xa_load(&br_offloads->ports, mlx5_esw_bridge_port_key_from_data(vport_num, + esw_owner_vhca_id)); +} + +static void mlx5_esw_bridge_port_erase(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + xa_erase(&br_offloads->ports, mlx5_esw_bridge_port_key(port)); +} + +static struct mlx5_esw_bridge * +mlx5_esw_bridge_from_port_lookup(u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_esw_bridge_port *port; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) + return NULL; + + return port->bridge; +} + +static void mlx5_esw_bridge_fdb_entry_refresh(struct mlx5_esw_bridge_fdb_entry *entry) +{ + trace_mlx5_esw_bridge_fdb_entry_refresh(entry); + + mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr, + entry->key.vid, + SWITCHDEV_FDB_ADD_TO_BRIDGE); +} + +static void +mlx5_esw_bridge_fdb_entry_cleanup(struct mlx5_esw_bridge_fdb_entry *entry, + struct mlx5_esw_bridge *bridge) +{ + trace_mlx5_esw_bridge_fdb_entry_cleanup(entry); + + rhashtable_remove_fast(&bridge->fdb_ht, &entry->ht_node, fdb_ht_params); + mlx5_del_flow_rules(entry->egress_handle); + if (entry->filter_handle) + mlx5_del_flow_rules(entry->filter_handle); + mlx5_del_flow_rules(entry->ingress_handle); + mlx5_fc_destroy(bridge->br_offloads->esw->dev, entry->ingress_counter); + list_del(&entry->vlan_list); + list_del(&entry->list); + kvfree(entry); +} + +static void +mlx5_esw_bridge_fdb_entry_notify_and_cleanup(struct mlx5_esw_bridge_fdb_entry *entry, + struct mlx5_esw_bridge *bridge) +{ + mlx5_esw_bridge_fdb_del_notify(entry); + mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge); +} + +static void mlx5_esw_bridge_fdb_flush(struct mlx5_esw_bridge *bridge) +{ + struct mlx5_esw_bridge_fdb_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list) + mlx5_esw_bridge_fdb_entry_notify_and_cleanup(entry, bridge); +} + +static struct mlx5_esw_bridge_vlan * +mlx5_esw_bridge_vlan_lookup(u16 vid, struct mlx5_esw_bridge_port *port) +{ + return xa_load(&port->vlans, vid); +} + +static int +mlx5_esw_bridge_vlan_push_create(u16 vlan_proto, struct mlx5_esw_bridge_vlan *vlan, + struct mlx5_eswitch *esw) +{ + struct { + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + } vlan_hdr = { htons(vlan_proto), htons(vlan->vid) }; + struct mlx5_pkt_reformat_params reformat_params = {}; + struct mlx5_pkt_reformat *pkt_reformat; + + if (!BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat_insert)) || + MLX5_CAP_GEN_2(esw->dev, max_reformat_insert_size) < sizeof(vlan_hdr) || + MLX5_CAP_GEN_2(esw->dev, max_reformat_insert_offset) < + offsetof(struct vlan_ethhdr, h_vlan_proto)) { + esw_warn(esw->dev, "Packet reformat INSERT_HEADER is not supported\n"); + return -EOPNOTSUPP; + } + + reformat_params.type = MLX5_REFORMAT_TYPE_INSERT_HDR; + reformat_params.param_0 = MLX5_REFORMAT_CONTEXT_ANCHOR_MAC_START; + reformat_params.param_1 = offsetof(struct vlan_ethhdr, h_vlan_proto); + reformat_params.size = sizeof(vlan_hdr); + reformat_params.data = &vlan_hdr; + pkt_reformat = mlx5_packet_reformat_alloc(esw->dev, + &reformat_params, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(pkt_reformat)) { + esw_warn(esw->dev, "Failed to alloc packet reformat INSERT_HEADER (err=%ld)\n", + PTR_ERR(pkt_reformat)); + return PTR_ERR(pkt_reformat); + } + + vlan->pkt_reformat_push = pkt_reformat; + return 0; +} + +static void +mlx5_esw_bridge_vlan_push_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw) +{ + mlx5_packet_reformat_dealloc(esw->dev, vlan->pkt_reformat_push); + vlan->pkt_reformat_push = NULL; +} + +static int +mlx5_esw_bridge_vlan_pop_create(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw) +{ + struct mlx5_pkt_reformat *pkt_reformat; + + if (!mlx5_esw_bridge_pkt_reformat_vlan_pop_supported(esw)) { + esw_warn(esw->dev, "Packet reformat REMOVE_HEADER is not supported\n"); + return -EOPNOTSUPP; + } + + pkt_reformat = mlx5_esw_bridge_pkt_reformat_vlan_pop_create(esw); + if (IS_ERR(pkt_reformat)) { + esw_warn(esw->dev, "Failed to alloc packet reformat REMOVE_HEADER (err=%ld)\n", + PTR_ERR(pkt_reformat)); + return PTR_ERR(pkt_reformat); + } + + vlan->pkt_reformat_pop = pkt_reformat; + return 0; +} + +static void +mlx5_esw_bridge_vlan_pop_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw) +{ + mlx5_packet_reformat_dealloc(esw->dev, vlan->pkt_reformat_pop); + vlan->pkt_reformat_pop = NULL; +} + +static int +mlx5_esw_bridge_vlan_push_mark_create(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw) +{ + u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + struct mlx5_modify_hdr *pkt_mod_hdr; + + MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_1); + MLX5_SET(set_action_in, action, offset, 8); + MLX5_SET(set_action_in, action, length, ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS); + MLX5_SET(set_action_in, action, data, ESW_TUN_BRIDGE_INGRESS_PUSH_VLAN); + + pkt_mod_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB, 1, action); + if (IS_ERR(pkt_mod_hdr)) + return PTR_ERR(pkt_mod_hdr); + + vlan->pkt_mod_hdr_push_mark = pkt_mod_hdr; + return 0; +} + +static void +mlx5_esw_bridge_vlan_push_mark_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw) +{ + mlx5_modify_header_dealloc(esw->dev, vlan->pkt_mod_hdr_push_mark); + vlan->pkt_mod_hdr_push_mark = NULL; +} + +static int +mlx5_esw_bridge_vlan_push_pop_fhs_create(u16 vlan_proto, struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_vlan *vlan) +{ + return mlx5_esw_bridge_vlan_mcast_init(vlan_proto, port, vlan); +} + +static void +mlx5_esw_bridge_vlan_push_pop_fhs_cleanup(struct mlx5_esw_bridge_vlan *vlan) +{ + mlx5_esw_bridge_vlan_mcast_cleanup(vlan); +} + +static int +mlx5_esw_bridge_vlan_push_pop_create(u16 vlan_proto, u16 flags, struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw) +{ + int err; + + if (flags & BRIDGE_VLAN_INFO_PVID) { + err = mlx5_esw_bridge_vlan_push_create(vlan_proto, vlan, esw); + if (err) + return err; + + err = mlx5_esw_bridge_vlan_push_mark_create(vlan, esw); + if (err) + goto err_vlan_push_mark; + } + + if (flags & BRIDGE_VLAN_INFO_UNTAGGED) { + err = mlx5_esw_bridge_vlan_pop_create(vlan, esw); + if (err) + goto err_vlan_pop; + + err = mlx5_esw_bridge_vlan_push_pop_fhs_create(vlan_proto, port, vlan); + if (err) + goto err_vlan_pop_fhs; + } + + return 0; + +err_vlan_pop_fhs: + mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw); +err_vlan_pop: + if (vlan->pkt_mod_hdr_push_mark) + mlx5_esw_bridge_vlan_push_mark_cleanup(vlan, esw); +err_vlan_push_mark: + if (vlan->pkt_reformat_push) + mlx5_esw_bridge_vlan_push_cleanup(vlan, esw); + return err; +} + +static struct mlx5_esw_bridge_vlan * +mlx5_esw_bridge_vlan_create(u16 vlan_proto, u16 vid, u16 flags, struct mlx5_esw_bridge_port *port, + struct mlx5_eswitch *esw) +{ + struct mlx5_esw_bridge_vlan *vlan; + int err; + + vlan = kvzalloc(sizeof(*vlan), GFP_KERNEL); + if (!vlan) + return ERR_PTR(-ENOMEM); + + vlan->vid = vid; + vlan->flags = flags; + INIT_LIST_HEAD(&vlan->fdb_list); + + err = mlx5_esw_bridge_vlan_push_pop_create(vlan_proto, flags, port, vlan, esw); + if (err) + goto err_vlan_push_pop; + + err = xa_insert(&port->vlans, vid, vlan, GFP_KERNEL); + if (err) + goto err_xa_insert; + + trace_mlx5_esw_bridge_vlan_create(vlan); + return vlan; + +err_xa_insert: + if (vlan->mcast_handle) + mlx5_esw_bridge_vlan_push_pop_fhs_cleanup(vlan); + if (vlan->pkt_reformat_pop) + mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw); + if (vlan->pkt_mod_hdr_push_mark) + mlx5_esw_bridge_vlan_push_mark_cleanup(vlan, esw); + if (vlan->pkt_reformat_push) + mlx5_esw_bridge_vlan_push_cleanup(vlan, esw); +err_vlan_push_pop: + kvfree(vlan); + return ERR_PTR(err); +} + +static void mlx5_esw_bridge_vlan_erase(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_vlan *vlan) +{ + xa_erase(&port->vlans, vlan->vid); +} + +static void mlx5_esw_bridge_vlan_flush(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_vlan *vlan, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_eswitch *esw = bridge->br_offloads->esw; + struct mlx5_esw_bridge_fdb_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &vlan->fdb_list, vlan_list) + mlx5_esw_bridge_fdb_entry_notify_and_cleanup(entry, bridge); + mlx5_esw_bridge_port_mdb_vlan_flush(port, vlan); + + if (vlan->mcast_handle) + mlx5_esw_bridge_vlan_push_pop_fhs_cleanup(vlan); + if (vlan->pkt_reformat_pop) + mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw); + if (vlan->pkt_mod_hdr_push_mark) + mlx5_esw_bridge_vlan_push_mark_cleanup(vlan, esw); + if (vlan->pkt_reformat_push) + mlx5_esw_bridge_vlan_push_cleanup(vlan, esw); +} + +static void mlx5_esw_bridge_vlan_cleanup(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_vlan *vlan, + struct mlx5_esw_bridge *bridge) +{ + trace_mlx5_esw_bridge_vlan_cleanup(vlan); + mlx5_esw_bridge_vlan_flush(port, vlan, bridge); + mlx5_esw_bridge_vlan_erase(port, vlan); + kvfree(vlan); +} + +static void mlx5_esw_bridge_port_vlans_flush(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_esw_bridge_vlan *vlan; + unsigned long index; + + xa_for_each(&port->vlans, index, vlan) + mlx5_esw_bridge_vlan_cleanup(port, vlan, bridge); +} + +static int mlx5_esw_bridge_port_vlans_recreate(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads; + struct mlx5_esw_bridge_vlan *vlan; + unsigned long i; + int err; + + xa_for_each(&port->vlans, i, vlan) { + mlx5_esw_bridge_vlan_flush(port, vlan, bridge); + err = mlx5_esw_bridge_vlan_push_pop_create(bridge->vlan_proto, vlan->flags, port, + vlan, br_offloads->esw); + if (err) { + esw_warn(br_offloads->esw->dev, + "Failed to create VLAN=%u(proto=%x) push/pop actions (vport=%u,err=%d)\n", + vlan->vid, bridge->vlan_proto, port->vport_num, + err); + return err; + } + } + + return 0; +} + +static int +mlx5_esw_bridge_vlans_recreate(struct mlx5_esw_bridge *bridge) +{ + struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads; + struct mlx5_esw_bridge_port *port; + unsigned long i; + int err; + + xa_for_each(&br_offloads->ports, i, port) { + if (port->bridge != bridge) + continue; + + err = mlx5_esw_bridge_port_vlans_recreate(port, bridge); + if (err) + return err; + } + + return 0; +} + +static struct mlx5_esw_bridge_vlan * +mlx5_esw_bridge_port_vlan_lookup(u16 vid, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge *bridge, struct mlx5_eswitch *esw) +{ + struct mlx5_esw_bridge_port *port; + struct mlx5_esw_bridge_vlan *vlan; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, bridge->br_offloads); + if (!port) { + /* FDB is added asynchronously on wq while port might have been deleted + * concurrently. Report on 'info' logging level and skip the FDB offload. + */ + esw_info(esw->dev, "Failed to lookup bridge port (vport=%u)\n", vport_num); + return ERR_PTR(-EINVAL); + } + + vlan = mlx5_esw_bridge_vlan_lookup(vid, port); + if (!vlan) { + /* FDB is added asynchronously on wq while vlan might have been deleted + * concurrently. Report on 'info' logging level and skip the FDB offload. + */ + esw_info(esw->dev, "Failed to lookup bridge port vlan metadata (vport=%u)\n", + vport_num); + return ERR_PTR(-EINVAL); + } + + return vlan; +} + +static struct mlx5_esw_bridge_fdb_entry * +mlx5_esw_bridge_fdb_lookup(struct mlx5_esw_bridge *bridge, + const unsigned char *addr, u16 vid) +{ + struct mlx5_esw_bridge_fdb_key key = {}; + + ether_addr_copy(key.addr, addr); + key.vid = vid; + return rhashtable_lookup_fast(&bridge->fdb_ht, &key, fdb_ht_params); +} + +static struct mlx5_esw_bridge_fdb_entry * +mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + const unsigned char *addr, u16 vid, bool added_by_user, bool peer, + struct mlx5_eswitch *esw, struct mlx5_esw_bridge *bridge) +{ + struct mlx5_esw_bridge_vlan *vlan = NULL; + struct mlx5_esw_bridge_fdb_entry *entry; + struct mlx5_flow_handle *handle; + struct mlx5_fc *counter; + int err; + + if (bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG && vid) { + vlan = mlx5_esw_bridge_port_vlan_lookup(vid, vport_num, esw_owner_vhca_id, bridge, + esw); + if (IS_ERR(vlan)) + return ERR_CAST(vlan); + } + + entry = mlx5_esw_bridge_fdb_lookup(bridge, addr, vid); + if (entry) + mlx5_esw_bridge_fdb_entry_notify_and_cleanup(entry, bridge); + + entry = kvzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return ERR_PTR(-ENOMEM); + + ether_addr_copy(entry->key.addr, addr); + entry->key.vid = vid; + entry->dev = dev; + entry->vport_num = vport_num; + entry->esw_owner_vhca_id = esw_owner_vhca_id; + entry->lastuse = jiffies; + if (added_by_user) + entry->flags |= MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER; + if (peer) + entry->flags |= MLX5_ESW_BRIDGE_FLAG_PEER; + + counter = mlx5_fc_create(esw->dev, true); + if (IS_ERR(counter)) { + err = PTR_ERR(counter); + goto err_ingress_fc_create; + } + entry->ingress_counter = counter; + + handle = peer ? + mlx5_esw_bridge_ingress_flow_peer_create(vport_num, esw_owner_vhca_id, + addr, vlan, mlx5_fc_id(counter), + bridge) : + mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vlan, + mlx5_fc_id(counter), bridge); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + esw_warn(esw->dev, "Failed to create ingress flow(vport=%u,err=%d,peer=%d)\n", + vport_num, err, peer); + goto err_ingress_flow_create; + } + entry->ingress_handle = handle; + + if (bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG) { + handle = mlx5_esw_bridge_ingress_filter_flow_create(vport_num, addr, bridge); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + esw_warn(esw->dev, "Failed to create ingress filter(vport=%u,err=%d)\n", + vport_num, err); + goto err_ingress_filter_flow_create; + } + entry->filter_handle = handle; + } + + handle = mlx5_esw_bridge_egress_flow_create(vport_num, esw_owner_vhca_id, addr, vlan, + bridge); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + esw_warn(esw->dev, "Failed to create egress flow(vport=%u,err=%d)\n", + vport_num, err); + goto err_egress_flow_create; + } + entry->egress_handle = handle; + + err = rhashtable_insert_fast(&bridge->fdb_ht, &entry->ht_node, fdb_ht_params); + if (err) { + esw_warn(esw->dev, "Failed to insert FDB flow(vport=%u,err=%d)\n", vport_num, err); + goto err_ht_init; + } + + if (vlan) + list_add(&entry->vlan_list, &vlan->fdb_list); + else + INIT_LIST_HEAD(&entry->vlan_list); + list_add(&entry->list, &bridge->fdb_list); + + trace_mlx5_esw_bridge_fdb_entry_init(entry); + return entry; + +err_ht_init: + mlx5_del_flow_rules(entry->egress_handle); +err_egress_flow_create: + if (entry->filter_handle) + mlx5_del_flow_rules(entry->filter_handle); +err_ingress_filter_flow_create: + mlx5_del_flow_rules(entry->ingress_handle); +err_ingress_flow_create: + mlx5_fc_destroy(esw->dev, entry->ingress_counter); +err_ingress_fc_create: + kvfree(entry); + return ERR_PTR(err); +} + +int mlx5_esw_bridge_ageing_time_set(u16 vport_num, u16 esw_owner_vhca_id, unsigned long ageing_time, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_esw_bridge *bridge; + + bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!bridge) + return -EINVAL; + + bridge->ageing_time = clock_t_to_jiffies(ageing_time); + return 0; +} + +int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_esw_bridge *bridge; + bool filtering; + + bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!bridge) + return -EINVAL; + + filtering = bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG; + if (filtering == enable) + return 0; + + mlx5_esw_bridge_fdb_flush(bridge); + mlx5_esw_bridge_mdb_flush(bridge); + if (enable) + bridge->flags |= MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG; + else + bridge->flags &= ~MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG; + + return 0; +} + +int mlx5_esw_bridge_vlan_proto_set(u16 vport_num, u16 esw_owner_vhca_id, u16 proto, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_esw_bridge *bridge; + + bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, + br_offloads); + if (!bridge) + return -EINVAL; + + if (bridge->vlan_proto == proto) + return 0; + if (proto != ETH_P_8021Q && proto != ETH_P_8021AD) { + esw_warn(br_offloads->esw->dev, "Can't set unsupported VLAN protocol %x", proto); + return -EOPNOTSUPP; + } + + mlx5_esw_bridge_fdb_flush(bridge); + mlx5_esw_bridge_mdb_flush(bridge); + bridge->vlan_proto = proto; + mlx5_esw_bridge_vlans_recreate(bridge); + + return 0; +} + +int mlx5_esw_bridge_mcast_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_eswitch *esw = br_offloads->esw; + struct mlx5_esw_bridge *bridge; + int err = 0; + bool mcast; + + if (!(MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_multi_path_any_table) || + MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_multi_path_any_table_limit_regc)) || + !MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_uplink_hairpin) || + !MLX5_CAP_ESW_FLOWTABLE_FDB((esw)->dev, ignore_flow_level)) + return -EOPNOTSUPP; + + bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!bridge) + return -EINVAL; + + mcast = bridge->flags & MLX5_ESW_BRIDGE_MCAST_FLAG; + if (mcast == enable) + return 0; + + if (enable) + err = mlx5_esw_bridge_mcast_enable(bridge); + else + mlx5_esw_bridge_mcast_disable(bridge); + + return err; +} + +static int mlx5_esw_bridge_vport_init(u16 vport_num, u16 esw_owner_vhca_id, u16 flags, + struct mlx5_esw_bridge_offloads *br_offloads, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_eswitch *esw = br_offloads->esw; + struct mlx5_esw_bridge_port *port; + int err; + + port = kvzalloc(sizeof(*port), GFP_KERNEL); + if (!port) + return -ENOMEM; + + port->vport_num = vport_num; + port->esw_owner_vhca_id = esw_owner_vhca_id; + port->bridge = bridge; + port->flags |= flags; + xa_init(&port->vlans); + + err = mlx5_esw_bridge_port_mcast_init(port); + if (err) { + esw_warn(esw->dev, + "Failed to initialize port multicast (vport=%u,esw_owner_vhca_id=%u,err=%d)\n", + port->vport_num, port->esw_owner_vhca_id, err); + goto err_port_mcast; + } + + err = mlx5_esw_bridge_port_insert(port, br_offloads); + if (err) { + esw_warn(esw->dev, + "Failed to insert port metadata (vport=%u,esw_owner_vhca_id=%u,err=%d)\n", + port->vport_num, port->esw_owner_vhca_id, err); + goto err_port_insert; + } + trace_mlx5_esw_bridge_vport_init(port); + + return 0; + +err_port_insert: + mlx5_esw_bridge_port_mcast_cleanup(port); +err_port_mcast: + kvfree(port); + return err; +} + +static int mlx5_esw_bridge_vport_cleanup(struct mlx5_esw_bridge_offloads *br_offloads, + struct mlx5_esw_bridge_port *port) +{ + u16 vport_num = port->vport_num, esw_owner_vhca_id = port->esw_owner_vhca_id; + struct mlx5_esw_bridge *bridge = port->bridge; + struct mlx5_esw_bridge_fdb_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list) + if (entry->vport_num == vport_num && entry->esw_owner_vhca_id == esw_owner_vhca_id) + mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge); + + trace_mlx5_esw_bridge_vport_cleanup(port); + mlx5_esw_bridge_port_vlans_flush(port, bridge); + mlx5_esw_bridge_port_mcast_cleanup(port); + mlx5_esw_bridge_port_erase(port, br_offloads); + kvfree(port); + mlx5_esw_bridge_put(br_offloads, bridge); + return 0; +} + +static int mlx5_esw_bridge_vport_link_with_flags(struct net_device *br_netdev, u16 vport_num, + u16 esw_owner_vhca_id, u16 flags, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_bridge *bridge; + int err; + + bridge = mlx5_esw_bridge_lookup(br_netdev, br_offloads); + if (IS_ERR(bridge)) { + NL_SET_ERR_MSG_MOD(extack, "Error checking for existing bridge with same ifindex"); + return PTR_ERR(bridge); + } + + err = mlx5_esw_bridge_vport_init(vport_num, esw_owner_vhca_id, flags, br_offloads, bridge); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error initializing port"); + goto err_vport; + } + return 0; + +err_vport: + mlx5_esw_bridge_put(br_offloads, bridge); + return err; +} + +int mlx5_esw_bridge_vport_link(struct net_device *br_netdev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) +{ + return mlx5_esw_bridge_vport_link_with_flags(br_netdev, vport_num, esw_owner_vhca_id, 0, + br_offloads, extack); +} + +int mlx5_esw_bridge_vport_unlink(struct net_device *br_netdev, u16 vport_num, + u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_bridge_port *port; + int err; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) { + NL_SET_ERR_MSG_MOD(extack, "Port is not attached to any bridge"); + return -EINVAL; + } + if (port->bridge->ifindex != br_netdev->ifindex) { + NL_SET_ERR_MSG_MOD(extack, "Port is attached to another bridge"); + return -EINVAL; + } + + err = mlx5_esw_bridge_vport_cleanup(br_offloads, port); + if (err) + NL_SET_ERR_MSG_MOD(extack, "Port cleanup failed"); + return err; +} + +int mlx5_esw_bridge_vport_peer_link(struct net_device *br_netdev, u16 vport_num, + u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) +{ + if (!MLX5_CAP_ESW(br_offloads->esw->dev, merged_eswitch)) + return 0; + + return mlx5_esw_bridge_vport_link_with_flags(br_netdev, vport_num, esw_owner_vhca_id, + MLX5_ESW_BRIDGE_PORT_FLAG_PEER, + br_offloads, extack); +} + +int mlx5_esw_bridge_vport_peer_unlink(struct net_device *br_netdev, u16 vport_num, + u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) +{ + return mlx5_esw_bridge_vport_unlink(br_netdev, vport_num, esw_owner_vhca_id, br_offloads, + extack); +} + +int mlx5_esw_bridge_port_vlan_add(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, u16 flags, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_bridge_port *port; + struct mlx5_esw_bridge_vlan *vlan; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) + return -EINVAL; + + vlan = mlx5_esw_bridge_vlan_lookup(vid, port); + if (vlan) { + if (vlan->flags == flags) + return 0; + mlx5_esw_bridge_vlan_cleanup(port, vlan, port->bridge); + } + + vlan = mlx5_esw_bridge_vlan_create(port->bridge->vlan_proto, vid, flags, port, + br_offloads->esw); + if (IS_ERR(vlan)) { + NL_SET_ERR_MSG_MOD(extack, "Failed to create VLAN entry"); + return PTR_ERR(vlan); + } + return 0; +} + +void mlx5_esw_bridge_port_vlan_del(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_esw_bridge_port *port; + struct mlx5_esw_bridge_vlan *vlan; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) + return; + + vlan = mlx5_esw_bridge_vlan_lookup(vid, port); + if (!vlan) + return; + mlx5_esw_bridge_vlan_cleanup(port, vlan, port->bridge); +} + +void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info) +{ + struct mlx5_esw_bridge_fdb_entry *entry; + struct mlx5_esw_bridge *bridge; + + bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!bridge) + return; + + entry = mlx5_esw_bridge_fdb_lookup(bridge, fdb_info->addr, fdb_info->vid); + if (!entry) { + esw_debug(br_offloads->esw->dev, + "FDB update entry with specified key not found (MAC=%pM,vid=%u,vport=%u)\n", + fdb_info->addr, fdb_info->vid, vport_num); + return; + } + + entry->lastuse = jiffies; +} + +void mlx5_esw_bridge_fdb_mark_deleted(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info) +{ + struct mlx5_esw_bridge_fdb_entry *entry; + struct mlx5_esw_bridge *bridge; + + bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!bridge) + return; + + entry = mlx5_esw_bridge_fdb_lookup(bridge, fdb_info->addr, fdb_info->vid); + if (!entry) { + esw_debug(br_offloads->esw->dev, + "FDB mark deleted entry with specified key not found (MAC=%pM,vid=%u,vport=%u)\n", + fdb_info->addr, fdb_info->vid, vport_num); + return; + } + + entry->flags |= MLX5_ESW_BRIDGE_FLAG_DELETED; +} + +void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info) +{ + struct mlx5_esw_bridge_fdb_entry *entry; + struct mlx5_esw_bridge_port *port; + struct mlx5_esw_bridge *bridge; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) + return; + + bridge = port->bridge; + entry = mlx5_esw_bridge_fdb_entry_init(dev, vport_num, esw_owner_vhca_id, fdb_info->addr, + fdb_info->vid, fdb_info->added_by_user, + port->flags & MLX5_ESW_BRIDGE_PORT_FLAG_PEER, + br_offloads->esw, bridge); + if (IS_ERR(entry)) + return; + + if (entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER) + mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid, + SWITCHDEV_FDB_OFFLOADED); + else if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_PEER)) + /* Take over dynamic entries to prevent kernel bridge from aging them out. */ + mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid, + SWITCHDEV_FDB_ADD_TO_BRIDGE); +} + +void mlx5_esw_bridge_fdb_remove(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info) +{ + struct mlx5_eswitch *esw = br_offloads->esw; + struct mlx5_esw_bridge_fdb_entry *entry; + struct mlx5_esw_bridge *bridge; + + bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!bridge) + return; + + entry = mlx5_esw_bridge_fdb_lookup(bridge, fdb_info->addr, fdb_info->vid); + if (!entry) { + esw_debug(esw->dev, + "FDB remove entry with specified key not found (MAC=%pM,vid=%u,vport=%u)\n", + fdb_info->addr, fdb_info->vid, vport_num); + return; + } + + mlx5_esw_bridge_fdb_entry_notify_and_cleanup(entry, bridge); +} + +void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_esw_bridge_fdb_entry *entry, *tmp; + struct mlx5_esw_bridge *bridge; + + list_for_each_entry(bridge, &br_offloads->bridges, list) { + list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list) { + unsigned long lastuse = + (unsigned long)mlx5_fc_query_lastuse(entry->ingress_counter); + + if (entry->flags & (MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER | + MLX5_ESW_BRIDGE_FLAG_DELETED)) + continue; + + if (time_after(lastuse, entry->lastuse)) + mlx5_esw_bridge_fdb_entry_refresh(entry); + else if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_PEER) && + time_is_before_jiffies(entry->lastuse + bridge->ageing_time)) + mlx5_esw_bridge_fdb_entry_notify_and_cleanup(entry, bridge); + } + } +} + +int mlx5_esw_bridge_port_mdb_add(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + const unsigned char *addr, u16 vid, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_bridge_vlan *vlan; + struct mlx5_esw_bridge_port *port; + struct mlx5_esw_bridge *bridge; + int err; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) { + esw_warn(br_offloads->esw->dev, + "Failed to lookup bridge port to add MDB (MAC=%pM,vport=%u)\n", + addr, vport_num); + NL_SET_ERR_MSG_FMT_MOD(extack, + "Failed to lookup bridge port to add MDB (MAC=%pM,vport=%u)\n", + addr, vport_num); + return -EINVAL; + } + + bridge = port->bridge; + if (bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG && vid) { + vlan = mlx5_esw_bridge_vlan_lookup(vid, port); + if (!vlan) { + esw_warn(br_offloads->esw->dev, + "Failed to lookup bridge port vlan metadata to create MDB (MAC=%pM,vid=%u,vport=%u)\n", + addr, vid, vport_num); + NL_SET_ERR_MSG_FMT_MOD(extack, + "Failed to lookup bridge port vlan metadata to create MDB (MAC=%pM,vid=%u,vport=%u)\n", + addr, vid, vport_num); + return -EINVAL; + } + } + + err = mlx5_esw_bridge_port_mdb_attach(dev, port, addr, vid); + if (err) { + NL_SET_ERR_MSG_FMT_MOD(extack, "Failed to add MDB (MAC=%pM,vid=%u,vport=%u)\n", + addr, vid, vport_num); + return err; + } + + return 0; +} + +void mlx5_esw_bridge_port_mdb_del(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + const unsigned char *addr, u16 vid, + struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_esw_bridge_port *port; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) + return; + + mlx5_esw_bridge_port_mdb_detach(dev, port, addr, vid); +} + +static void mlx5_esw_bridge_flush(struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_esw_bridge_port *port; + unsigned long i; + + xa_for_each(&br_offloads->ports, i, port) + mlx5_esw_bridge_vport_cleanup(br_offloads, port); + + WARN_ONCE(!list_empty(&br_offloads->bridges), + "Cleaning up bridge offloads while still having bridges attached\n"); +} + +struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_bridge_offloads *br_offloads; + + ASSERT_RTNL(); + + br_offloads = kvzalloc(sizeof(*br_offloads), GFP_KERNEL); + if (!br_offloads) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&br_offloads->bridges); + xa_init(&br_offloads->ports); + br_offloads->esw = esw; + esw->br_offloads = br_offloads; + mlx5_esw_bridge_debugfs_offloads_init(br_offloads); + + return br_offloads; +} + +void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_bridge_offloads *br_offloads = esw->br_offloads; + + ASSERT_RTNL(); + + if (!br_offloads) + return; + + mlx5_esw_bridge_flush(br_offloads); + WARN_ON(!xa_empty(&br_offloads->ports)); + mlx5_esw_bridge_debugfs_offloads_cleanup(br_offloads); + + esw->br_offloads = NULL; + kvfree(br_offloads); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h new file mode 100644 index 0000000000..d6f5391619 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5_ESW_BRIDGE_H__ +#define __MLX5_ESW_BRIDGE_H__ + +#include +#include +#include +#include +#include "eswitch.h" + +struct dentry; +struct mlx5_flow_table; +struct mlx5_flow_group; + +struct mlx5_esw_bridge_offloads { + struct mlx5_eswitch *esw; + struct list_head bridges; + struct xarray ports; + struct dentry *debugfs_root; + + struct notifier_block netdev_nb; + struct notifier_block nb_blk; + struct notifier_block nb; + struct workqueue_struct *wq; + struct delayed_work update_work; + + struct mlx5_flow_table *ingress_ft; + struct mlx5_flow_group *ingress_igmp_fg; + struct mlx5_flow_group *ingress_mld_fg; + struct mlx5_flow_group *ingress_vlan_fg; + struct mlx5_flow_group *ingress_vlan_filter_fg; + struct mlx5_flow_group *ingress_qinq_fg; + struct mlx5_flow_group *ingress_qinq_filter_fg; + struct mlx5_flow_group *ingress_mac_fg; + + struct mlx5_flow_handle *igmp_handle; + struct mlx5_flow_handle *mld_query_handle; + struct mlx5_flow_handle *mld_report_handle; + struct mlx5_flow_handle *mld_done_handle; + + struct mlx5_flow_table *skip_ft; +}; + +struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw); +void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw); +int mlx5_esw_bridge_vport_link(struct net_device *br_netdev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack); +int mlx5_esw_bridge_vport_unlink(struct net_device *br_netdev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack); +int mlx5_esw_bridge_vport_peer_link(struct net_device *br_netdev, u16 vport_num, + u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack); +int mlx5_esw_bridge_vport_peer_unlink(struct net_device *br_netdev, u16 vport_num, + u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack); +void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info); +void mlx5_esw_bridge_fdb_mark_deleted(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info); +void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info); +void mlx5_esw_bridge_fdb_remove(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info); +void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads); +int mlx5_esw_bridge_ageing_time_set(u16 vport_num, u16 esw_owner_vhca_id, unsigned long ageing_time, + struct mlx5_esw_bridge_offloads *br_offloads); +int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable, + struct mlx5_esw_bridge_offloads *br_offloads); +int mlx5_esw_bridge_vlan_proto_set(u16 vport_num, u16 esw_owner_vhca_id, u16 proto, + struct mlx5_esw_bridge_offloads *br_offloads); +int mlx5_esw_bridge_mcast_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable, + struct mlx5_esw_bridge_offloads *br_offloads); +int mlx5_esw_bridge_port_vlan_add(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, u16 flags, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack); +void mlx5_esw_bridge_port_vlan_del(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, + struct mlx5_esw_bridge_offloads *br_offloads); + +int mlx5_esw_bridge_port_mdb_add(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + const unsigned char *addr, u16 vid, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack); +void mlx5_esw_bridge_port_mdb_del(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + const unsigned char *addr, u16 vid, + struct mlx5_esw_bridge_offloads *br_offloads); + +#endif /* __MLX5_ESW_BRIDGE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c new file mode 100644 index 0000000000..dbd7cbe6cb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include +#include "bridge.h" +#include "bridge_priv.h" + +static void *mlx5_esw_bridge_debugfs_start(struct seq_file *seq, loff_t *pos); +static void *mlx5_esw_bridge_debugfs_next(struct seq_file *seq, void *v, loff_t *pos); +static void mlx5_esw_bridge_debugfs_stop(struct seq_file *seq, void *v); +static int mlx5_esw_bridge_debugfs_show(struct seq_file *seq, void *v); + +static const struct seq_operations mlx5_esw_bridge_debugfs_sops = { + .start = mlx5_esw_bridge_debugfs_start, + .next = mlx5_esw_bridge_debugfs_next, + .stop = mlx5_esw_bridge_debugfs_stop, + .show = mlx5_esw_bridge_debugfs_show, +}; +DEFINE_SEQ_ATTRIBUTE(mlx5_esw_bridge_debugfs); + +static void *mlx5_esw_bridge_debugfs_start(struct seq_file *seq, loff_t *pos) +{ + struct mlx5_esw_bridge *bridge = seq->private; + + rtnl_lock(); + return *pos ? seq_list_start(&bridge->fdb_list, *pos - 1) : SEQ_START_TOKEN; +} + +static void *mlx5_esw_bridge_debugfs_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct mlx5_esw_bridge *bridge = seq->private; + + return seq_list_next(v == SEQ_START_TOKEN ? &bridge->fdb_list : v, &bridge->fdb_list, pos); +} + +static void mlx5_esw_bridge_debugfs_stop(struct seq_file *seq, void *v) +{ + rtnl_unlock(); +} + +static int mlx5_esw_bridge_debugfs_show(struct seq_file *seq, void *v) +{ + struct mlx5_esw_bridge_fdb_entry *entry; + u64 packets, bytes, lastuse; + + if (v == SEQ_START_TOKEN) { + seq_printf(seq, "%-16s %-17s %4s %20s %20s %20s %5s\n", + "DEV", "MAC", "VLAN", "PACKETS", "BYTES", "LASTUSE", "FLAGS"); + return 0; + } + + entry = list_entry(v, struct mlx5_esw_bridge_fdb_entry, list); + mlx5_fc_query_cached_raw(entry->ingress_counter, &bytes, &packets, &lastuse); + seq_printf(seq, "%-16s %-17pM %4d %20llu %20llu %20llu %#5x\n", + entry->dev->name, entry->key.addr, entry->key.vid, packets, bytes, lastuse, + entry->flags); + return 0; +} + +void mlx5_esw_bridge_debugfs_init(struct net_device *br_netdev, struct mlx5_esw_bridge *bridge) +{ + if (!bridge->br_offloads->debugfs_root) + return; + + bridge->debugfs_dir = debugfs_create_dir(br_netdev->name, + bridge->br_offloads->debugfs_root); + debugfs_create_file("fdb", 0400, bridge->debugfs_dir, bridge, + &mlx5_esw_bridge_debugfs_fops); +} + +void mlx5_esw_bridge_debugfs_cleanup(struct mlx5_esw_bridge *bridge) +{ + debugfs_remove_recursive(bridge->debugfs_dir); + bridge->debugfs_dir = NULL; +} + +void mlx5_esw_bridge_debugfs_offloads_init(struct mlx5_esw_bridge_offloads *br_offloads) +{ + if (!br_offloads->esw->debugfs_root) + return; + + br_offloads->debugfs_root = debugfs_create_dir("bridge", br_offloads->esw->debugfs_root); +} + +void mlx5_esw_bridge_debugfs_offloads_cleanup(struct mlx5_esw_bridge_offloads *br_offloads) +{ + debugfs_remove_recursive(br_offloads->debugfs_root); + br_offloads->debugfs_root = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c new file mode 100644 index 0000000000..22dd30cf80 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c @@ -0,0 +1,1134 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "lib/devcom.h" +#include "bridge.h" +#include "eswitch.h" +#include "bridge_priv.h" +#include "diag/bridge_tracepoint.h" + +static const struct rhashtable_params mdb_ht_params = { + .key_offset = offsetof(struct mlx5_esw_bridge_mdb_entry, key), + .key_len = sizeof(struct mlx5_esw_bridge_mdb_key), + .head_offset = offsetof(struct mlx5_esw_bridge_mdb_entry, ht_node), + .automatic_shrinking = true, +}; + +int mlx5_esw_bridge_mdb_init(struct mlx5_esw_bridge *bridge) +{ + INIT_LIST_HEAD(&bridge->mdb_list); + return rhashtable_init(&bridge->mdb_ht, &mdb_ht_params); +} + +void mlx5_esw_bridge_mdb_cleanup(struct mlx5_esw_bridge *bridge) +{ + rhashtable_destroy(&bridge->mdb_ht); +} + +static struct mlx5_esw_bridge_port * +mlx5_esw_bridge_mdb_port_lookup(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_mdb_entry *entry) +{ + return xa_load(&entry->ports, mlx5_esw_bridge_port_key(port)); +} + +static int mlx5_esw_bridge_mdb_port_insert(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_mdb_entry *entry) +{ + int err = xa_insert(&entry->ports, mlx5_esw_bridge_port_key(port), port, GFP_KERNEL); + + if (!err) + entry->num_ports++; + return err; +} + +static void mlx5_esw_bridge_mdb_port_remove(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_mdb_entry *entry) +{ + xa_erase(&entry->ports, mlx5_esw_bridge_port_key(port)); + entry->num_ports--; +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_mdb_flow_create(u16 esw_owner_vhca_id, struct mlx5_esw_bridge_mdb_entry *entry, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flags = FLOW_ACT_NO_APPEND | FLOW_ACT_IGNORE_FLOW_LEVEL, + }; + int num_dests = entry->num_ports, i = 0; + struct mlx5_flow_destination *dests; + struct mlx5_esw_bridge_port *port; + struct mlx5_flow_spec *rule_spec; + struct mlx5_flow_handle *handle; + u8 *dmac_v, *dmac_c; + unsigned long idx; + + rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); + if (!rule_spec) + return ERR_PTR(-ENOMEM); + + dests = kvcalloc(num_dests, sizeof(*dests), GFP_KERNEL); + if (!dests) { + kvfree(rule_spec); + return ERR_PTR(-ENOMEM); + } + + xa_for_each(&entry->ports, idx, port) { + dests[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dests[i].ft = port->mcast.ft; + if (port->vport_num == MLX5_VPORT_UPLINK) + dests[i].ft->flags |= MLX5_FLOW_TABLE_UPLINK_VPORT; + i++; + } + + rule_spec->flow_context.flags |= FLOW_CONTEXT_UPLINK_HAIRPIN_EN; + rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value, outer_headers.dmac_47_16); + ether_addr_copy(dmac_v, entry->key.addr); + dmac_c = MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria, outer_headers.dmac_47_16); + eth_broadcast_addr(dmac_c); + + if (entry->key.vid) { + if (bridge->vlan_proto == ETH_P_8021Q) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.cvlan_tag); + } else if (bridge->vlan_proto == ETH_P_8021AD) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.svlan_tag); + } + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.first_vid); + MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.first_vid, + entry->key.vid); + } + + handle = mlx5_add_flow_rules(bridge->egress_ft, rule_spec, &flow_act, dests, num_dests); + + kvfree(dests); + kvfree(rule_spec); + return handle; +} + +static int +mlx5_esw_bridge_port_mdb_offload(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_mdb_entry *entry) +{ + struct mlx5_flow_handle *handle; + + handle = mlx5_esw_bridge_mdb_flow_create(port->esw_owner_vhca_id, entry, port->bridge); + if (entry->egress_handle) { + mlx5_del_flow_rules(entry->egress_handle); + entry->egress_handle = NULL; + } + if (IS_ERR(handle)) + return PTR_ERR(handle); + + entry->egress_handle = handle; + return 0; +} + +static struct mlx5_esw_bridge_mdb_entry * +mlx5_esw_bridge_mdb_lookup(struct mlx5_esw_bridge *bridge, + const unsigned char *addr, u16 vid) +{ + struct mlx5_esw_bridge_mdb_key key = {}; + + ether_addr_copy(key.addr, addr); + key.vid = vid; + return rhashtable_lookup_fast(&bridge->mdb_ht, &key, mdb_ht_params); +} + +static struct mlx5_esw_bridge_mdb_entry * +mlx5_esw_bridge_port_mdb_entry_init(struct mlx5_esw_bridge_port *port, + const unsigned char *addr, u16 vid) +{ + struct mlx5_esw_bridge *bridge = port->bridge; + struct mlx5_esw_bridge_mdb_entry *entry; + int err; + + entry = kvzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return ERR_PTR(-ENOMEM); + + ether_addr_copy(entry->key.addr, addr); + entry->key.vid = vid; + xa_init(&entry->ports); + err = rhashtable_insert_fast(&bridge->mdb_ht, &entry->ht_node, mdb_ht_params); + if (err) + goto err_ht_insert; + + list_add(&entry->list, &bridge->mdb_list); + + return entry; + +err_ht_insert: + xa_destroy(&entry->ports); + kvfree(entry); + return ERR_PTR(err); +} + +static void mlx5_esw_bridge_port_mdb_entry_cleanup(struct mlx5_esw_bridge *bridge, + struct mlx5_esw_bridge_mdb_entry *entry) +{ + if (entry->egress_handle) + mlx5_del_flow_rules(entry->egress_handle); + list_del(&entry->list); + rhashtable_remove_fast(&bridge->mdb_ht, &entry->ht_node, mdb_ht_params); + xa_destroy(&entry->ports); + kvfree(entry); +} + +int mlx5_esw_bridge_port_mdb_attach(struct net_device *dev, struct mlx5_esw_bridge_port *port, + const unsigned char *addr, u16 vid) +{ + struct mlx5_esw_bridge *bridge = port->bridge; + struct mlx5_esw_bridge_mdb_entry *entry; + int err; + + if (!(bridge->flags & MLX5_ESW_BRIDGE_MCAST_FLAG)) + return -EOPNOTSUPP; + + entry = mlx5_esw_bridge_mdb_lookup(bridge, addr, vid); + if (entry) { + if (mlx5_esw_bridge_mdb_port_lookup(port, entry)) { + esw_warn(bridge->br_offloads->esw->dev, "MDB attach entry is already attached to port (MAC=%pM,vid=%u,vport=%u)\n", + addr, vid, port->vport_num); + return 0; + } + } else { + entry = mlx5_esw_bridge_port_mdb_entry_init(port, addr, vid); + if (IS_ERR(entry)) { + err = PTR_ERR(entry); + esw_warn(bridge->br_offloads->esw->dev, "MDB attach failed to init entry (MAC=%pM,vid=%u,vport=%u,err=%d)\n", + addr, vid, port->vport_num, err); + return err; + } + } + + err = mlx5_esw_bridge_mdb_port_insert(port, entry); + if (err) { + if (!entry->num_ports) + mlx5_esw_bridge_port_mdb_entry_cleanup(bridge, entry); /* new mdb entry */ + esw_warn(bridge->br_offloads->esw->dev, + "MDB attach failed to insert port (MAC=%pM,vid=%u,vport=%u,err=%d)\n", + addr, vid, port->vport_num, err); + return err; + } + + err = mlx5_esw_bridge_port_mdb_offload(port, entry); + if (err) + /* Single mdb can be used by multiple ports, so just log the + * error and continue. + */ + esw_warn(bridge->br_offloads->esw->dev, "MDB attach failed to offload (MAC=%pM,vid=%u,vport=%u,err=%d)\n", + addr, vid, port->vport_num, err); + + trace_mlx5_esw_bridge_port_mdb_attach(dev, entry); + return 0; +} + +static void mlx5_esw_bridge_port_mdb_entry_detach(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_mdb_entry *entry) +{ + struct mlx5_esw_bridge *bridge = port->bridge; + int err; + + mlx5_esw_bridge_mdb_port_remove(port, entry); + if (!entry->num_ports) { + mlx5_esw_bridge_port_mdb_entry_cleanup(bridge, entry); + return; + } + + err = mlx5_esw_bridge_port_mdb_offload(port, entry); + if (err) + /* Single mdb can be used by multiple ports, so just log the + * error and continue. + */ + esw_warn(bridge->br_offloads->esw->dev, "MDB detach failed to offload (MAC=%pM,vid=%u,vport=%u)\n", + entry->key.addr, entry->key.vid, port->vport_num); +} + +void mlx5_esw_bridge_port_mdb_detach(struct net_device *dev, struct mlx5_esw_bridge_port *port, + const unsigned char *addr, u16 vid) +{ + struct mlx5_esw_bridge *bridge = port->bridge; + struct mlx5_esw_bridge_mdb_entry *entry; + + entry = mlx5_esw_bridge_mdb_lookup(bridge, addr, vid); + if (!entry) { + esw_debug(bridge->br_offloads->esw->dev, + "MDB detach entry not found (MAC=%pM,vid=%u,vport=%u)\n", + addr, vid, port->vport_num); + return; + } + + if (!mlx5_esw_bridge_mdb_port_lookup(port, entry)) { + esw_debug(bridge->br_offloads->esw->dev, + "MDB detach entry not attached to the port (MAC=%pM,vid=%u,vport=%u)\n", + addr, vid, port->vport_num); + return; + } + + trace_mlx5_esw_bridge_port_mdb_detach(dev, entry); + mlx5_esw_bridge_port_mdb_entry_detach(port, entry); +} + +void mlx5_esw_bridge_port_mdb_vlan_flush(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_vlan *vlan) +{ + struct mlx5_esw_bridge *bridge = port->bridge; + struct mlx5_esw_bridge_mdb_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &bridge->mdb_list, list) + if (entry->key.vid == vlan->vid && mlx5_esw_bridge_mdb_port_lookup(port, entry)) + mlx5_esw_bridge_port_mdb_entry_detach(port, entry); +} + +static void mlx5_esw_bridge_port_mdb_flush(struct mlx5_esw_bridge_port *port) +{ + struct mlx5_esw_bridge *bridge = port->bridge; + struct mlx5_esw_bridge_mdb_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &bridge->mdb_list, list) + if (mlx5_esw_bridge_mdb_port_lookup(port, entry)) + mlx5_esw_bridge_port_mdb_entry_detach(port, entry); +} + +void mlx5_esw_bridge_mdb_flush(struct mlx5_esw_bridge *bridge) +{ + struct mlx5_esw_bridge_mdb_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &bridge->mdb_list, list) + mlx5_esw_bridge_port_mdb_entry_cleanup(bridge, entry); +} +static int mlx5_esw_bridge_port_mcast_fts_init(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_eswitch *esw = bridge->br_offloads->esw; + struct mlx5_flow_table *mcast_ft; + + mcast_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_MCAST_TABLE_SIZE, + MLX5_ESW_BRIDGE_LEVEL_MCAST_TABLE, + esw); + if (IS_ERR(mcast_ft)) + return PTR_ERR(mcast_ft); + + port->mcast.ft = mcast_ft; + return 0; +} + +static void mlx5_esw_bridge_port_mcast_fts_cleanup(struct mlx5_esw_bridge_port *port) +{ + if (port->mcast.ft) + mlx5_destroy_flow_table(port->mcast.ft); + port->mcast.ft = NULL; +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_mcast_filter_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *mcast_ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in, *match; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS_2); + match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + + MLX5_SET(create_flow_group_in, in, start_flow_index, + MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_FROM); + MLX5_SET(create_flow_group_in, in, end_flow_index, + MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_TO); + + fg = mlx5_create_flow_group(mcast_ft, in); + kvfree(in); + if (IS_ERR(fg)) + esw_warn(esw->dev, + "Failed to create filter flow group for bridge mcast table (err=%pe)\n", + fg); + + return fg; +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_mcast_vlan_proto_fg_create(unsigned int from, unsigned int to, u16 vlan_proto, + struct mlx5_eswitch *esw, + struct mlx5_flow_table *mcast_ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in, *match; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + if (vlan_proto == ETH_P_8021Q) + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag); + else if (vlan_proto == ETH_P_8021AD) + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.first_vid); + + MLX5_SET(create_flow_group_in, in, start_flow_index, from); + MLX5_SET(create_flow_group_in, in, end_flow_index, to); + + fg = mlx5_create_flow_group(mcast_ft, in); + kvfree(in); + if (IS_ERR(fg)) + esw_warn(esw->dev, + "Failed to create VLAN(proto=%x) flow group for bridge mcast table (err=%pe)\n", + vlan_proto, fg); + + return fg; +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_mcast_vlan_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *mcast_ft) +{ + unsigned int from = MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_IDX_FROM; + unsigned int to = MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_IDX_TO; + + return mlx5_esw_bridge_mcast_vlan_proto_fg_create(from, to, ETH_P_8021Q, esw, mcast_ft); +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_mcast_qinq_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *mcast_ft) +{ + unsigned int from = MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_IDX_FROM; + unsigned int to = MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_IDX_TO; + + return mlx5_esw_bridge_mcast_vlan_proto_fg_create(from, to, ETH_P_8021AD, esw, mcast_ft); +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_mcast_fwd_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *mcast_ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, start_flow_index, + MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_FROM); + MLX5_SET(create_flow_group_in, in, end_flow_index, + MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_TO); + + fg = mlx5_create_flow_group(mcast_ft, in); + kvfree(in); + if (IS_ERR(fg)) + esw_warn(esw->dev, + "Failed to create forward flow group for bridge mcast table (err=%pe)\n", + fg); + + return fg; +} + +static int mlx5_esw_bridge_port_mcast_fgs_init(struct mlx5_esw_bridge_port *port) +{ + struct mlx5_flow_group *fwd_fg, *qinq_fg, *vlan_fg, *filter_fg; + struct mlx5_eswitch *esw = port->bridge->br_offloads->esw; + struct mlx5_flow_table *mcast_ft = port->mcast.ft; + int err; + + filter_fg = mlx5_esw_bridge_mcast_filter_fg_create(esw, mcast_ft); + if (IS_ERR(filter_fg)) + return PTR_ERR(filter_fg); + + vlan_fg = mlx5_esw_bridge_mcast_vlan_fg_create(esw, mcast_ft); + if (IS_ERR(vlan_fg)) { + err = PTR_ERR(vlan_fg); + goto err_vlan_fg; + } + + qinq_fg = mlx5_esw_bridge_mcast_qinq_fg_create(esw, mcast_ft); + if (IS_ERR(qinq_fg)) { + err = PTR_ERR(qinq_fg); + goto err_qinq_fg; + } + + fwd_fg = mlx5_esw_bridge_mcast_fwd_fg_create(esw, mcast_ft); + if (IS_ERR(fwd_fg)) { + err = PTR_ERR(fwd_fg); + goto err_fwd_fg; + } + + port->mcast.filter_fg = filter_fg; + port->mcast.vlan_fg = vlan_fg; + port->mcast.qinq_fg = qinq_fg; + port->mcast.fwd_fg = fwd_fg; + + return 0; + +err_fwd_fg: + mlx5_destroy_flow_group(qinq_fg); +err_qinq_fg: + mlx5_destroy_flow_group(vlan_fg); +err_vlan_fg: + mlx5_destroy_flow_group(filter_fg); + return err; +} + +static void mlx5_esw_bridge_port_mcast_fgs_cleanup(struct mlx5_esw_bridge_port *port) +{ + if (port->mcast.fwd_fg) + mlx5_destroy_flow_group(port->mcast.fwd_fg); + port->mcast.fwd_fg = NULL; + if (port->mcast.qinq_fg) + mlx5_destroy_flow_group(port->mcast.qinq_fg); + port->mcast.qinq_fg = NULL; + if (port->mcast.vlan_fg) + mlx5_destroy_flow_group(port->mcast.vlan_fg); + port->mcast.vlan_fg = NULL; + if (port->mcast.filter_fg) + mlx5_destroy_flow_group(port->mcast.filter_fg); + port->mcast.filter_fg = NULL; +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_mcast_flow_with_esw_create(struct mlx5_esw_bridge_port *port, + struct mlx5_eswitch *esw) +{ + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_DROP, + .flags = FLOW_ACT_NO_APPEND, + }; + struct mlx5_flow_spec *rule_spec; + struct mlx5_flow_handle *handle; + + rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); + if (!rule_spec) + return ERR_PTR(-ENOMEM); + + rule_spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + + MLX5_SET(fte_match_param, rule_spec->match_criteria, + misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); + MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(esw, port->vport_num)); + + handle = mlx5_add_flow_rules(port->mcast.ft, rule_spec, &flow_act, NULL, 0); + + kvfree(rule_spec); + return handle; +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_mcast_filter_flow_create(struct mlx5_esw_bridge_port *port) +{ + return mlx5_esw_bridge_mcast_flow_with_esw_create(port, port->bridge->br_offloads->esw); +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_mcast_filter_flow_peer_create(struct mlx5_esw_bridge_port *port) +{ + struct mlx5_devcom_comp_dev *devcom = port->bridge->br_offloads->esw->devcom, *pos; + struct mlx5_eswitch *tmp, *peer_esw = NULL; + static struct mlx5_flow_handle *handle; + + if (!mlx5_devcom_for_each_peer_begin(devcom)) + return ERR_PTR(-ENODEV); + + mlx5_devcom_for_each_peer_entry(devcom, tmp, pos) { + if (mlx5_esw_is_owner(tmp, port->vport_num, port->esw_owner_vhca_id)) { + peer_esw = tmp; + break; + } + } + + if (!peer_esw) { + handle = ERR_PTR(-ENODEV); + goto out; + } + + handle = mlx5_esw_bridge_mcast_flow_with_esw_create(port, peer_esw); + +out: + mlx5_devcom_for_each_peer_end(devcom); + return handle; +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_mcast_vlan_flow_create(u16 vlan_proto, struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_vlan *vlan) +{ + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flags = FLOW_ACT_NO_APPEND, + }; + struct mlx5_flow_destination dest = { + .type = MLX5_FLOW_DESTINATION_TYPE_VPORT, + .vport.num = port->vport_num, + }; + struct mlx5_esw_bridge *bridge = port->bridge; + struct mlx5_flow_spec *rule_spec; + struct mlx5_flow_handle *handle; + + rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); + if (!rule_spec) + return ERR_PTR(-ENOMEM); + + rule_spec->flow_context.flags |= FLOW_CONTEXT_UPLINK_HAIRPIN_EN; + rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act.pkt_reformat = vlan->pkt_reformat_pop; + + if (vlan_proto == ETH_P_8021Q) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.cvlan_tag); + } else if (vlan_proto == ETH_P_8021AD) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.svlan_tag); + } + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, outer_headers.first_vid); + MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.first_vid, vlan->vid); + + if (MLX5_CAP_ESW(bridge->br_offloads->esw->dev, merged_eswitch)) { + dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; + dest.vport.vhca_id = port->esw_owner_vhca_id; + } + handle = mlx5_add_flow_rules(port->mcast.ft, rule_spec, &flow_act, &dest, 1); + + kvfree(rule_spec); + return handle; +} + +int mlx5_esw_bridge_vlan_mcast_init(u16 vlan_proto, struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_vlan *vlan) +{ + struct mlx5_flow_handle *handle; + + if (!(port->bridge->flags & MLX5_ESW_BRIDGE_MCAST_FLAG)) + return 0; + + handle = mlx5_esw_bridge_mcast_vlan_flow_create(vlan_proto, port, vlan); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + vlan->mcast_handle = handle; + return 0; +} + +void mlx5_esw_bridge_vlan_mcast_cleanup(struct mlx5_esw_bridge_vlan *vlan) +{ + if (vlan->mcast_handle) + mlx5_del_flow_rules(vlan->mcast_handle); + vlan->mcast_handle = NULL; +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_mcast_fwd_flow_create(struct mlx5_esw_bridge_port *port) +{ + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flags = FLOW_ACT_NO_APPEND, + }; + struct mlx5_flow_destination dest = { + .type = MLX5_FLOW_DESTINATION_TYPE_VPORT, + .vport.num = port->vport_num, + }; + struct mlx5_esw_bridge *bridge = port->bridge; + struct mlx5_flow_spec *rule_spec; + struct mlx5_flow_handle *handle; + + rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); + if (!rule_spec) + return ERR_PTR(-ENOMEM); + + if (MLX5_CAP_ESW(bridge->br_offloads->esw->dev, merged_eswitch)) { + dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; + dest.vport.vhca_id = port->esw_owner_vhca_id; + } + rule_spec->flow_context.flags |= FLOW_CONTEXT_UPLINK_HAIRPIN_EN; + handle = mlx5_add_flow_rules(port->mcast.ft, rule_spec, &flow_act, &dest, 1); + + kvfree(rule_spec); + return handle; +} + +static int mlx5_esw_bridge_port_mcast_fhs_init(struct mlx5_esw_bridge_port *port) +{ + struct mlx5_flow_handle *filter_handle, *fwd_handle; + struct mlx5_esw_bridge_vlan *vlan, *failed; + unsigned long index; + int err; + + + filter_handle = (port->flags & MLX5_ESW_BRIDGE_PORT_FLAG_PEER) ? + mlx5_esw_bridge_mcast_filter_flow_peer_create(port) : + mlx5_esw_bridge_mcast_filter_flow_create(port); + if (IS_ERR(filter_handle)) + return PTR_ERR(filter_handle); + + fwd_handle = mlx5_esw_bridge_mcast_fwd_flow_create(port); + if (IS_ERR(fwd_handle)) { + err = PTR_ERR(fwd_handle); + goto err_fwd; + } + + xa_for_each(&port->vlans, index, vlan) { + err = mlx5_esw_bridge_vlan_mcast_init(port->bridge->vlan_proto, port, vlan); + if (err) { + failed = vlan; + goto err_vlan; + } + } + + port->mcast.filter_handle = filter_handle; + port->mcast.fwd_handle = fwd_handle; + + return 0; + +err_vlan: + xa_for_each(&port->vlans, index, vlan) { + if (vlan == failed) + break; + + mlx5_esw_bridge_vlan_mcast_cleanup(vlan); + } + mlx5_del_flow_rules(fwd_handle); +err_fwd: + mlx5_del_flow_rules(filter_handle); + return err; +} + +static void mlx5_esw_bridge_port_mcast_fhs_cleanup(struct mlx5_esw_bridge_port *port) +{ + struct mlx5_esw_bridge_vlan *vlan; + unsigned long index; + + xa_for_each(&port->vlans, index, vlan) + mlx5_esw_bridge_vlan_mcast_cleanup(vlan); + + if (port->mcast.fwd_handle) + mlx5_del_flow_rules(port->mcast.fwd_handle); + port->mcast.fwd_handle = NULL; + if (port->mcast.filter_handle) + mlx5_del_flow_rules(port->mcast.filter_handle); + port->mcast.filter_handle = NULL; +} + +int mlx5_esw_bridge_port_mcast_init(struct mlx5_esw_bridge_port *port) +{ + struct mlx5_esw_bridge *bridge = port->bridge; + int err; + + if (!(bridge->flags & MLX5_ESW_BRIDGE_MCAST_FLAG)) + return 0; + + err = mlx5_esw_bridge_port_mcast_fts_init(port, bridge); + if (err) + return err; + + err = mlx5_esw_bridge_port_mcast_fgs_init(port); + if (err) + goto err_fgs; + + err = mlx5_esw_bridge_port_mcast_fhs_init(port); + if (err) + goto err_fhs; + return err; + +err_fhs: + mlx5_esw_bridge_port_mcast_fgs_cleanup(port); +err_fgs: + mlx5_esw_bridge_port_mcast_fts_cleanup(port); + return err; +} + +void mlx5_esw_bridge_port_mcast_cleanup(struct mlx5_esw_bridge_port *port) +{ + mlx5_esw_bridge_port_mdb_flush(port); + mlx5_esw_bridge_port_mcast_fhs_cleanup(port); + mlx5_esw_bridge_port_mcast_fgs_cleanup(port); + mlx5_esw_bridge_port_mcast_fts_cleanup(port); +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_ingress_igmp_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ingress_ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in, *match; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ip_version); + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ip_protocol); + + MLX5_SET(create_flow_group_in, in, start_flow_index, + MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_IDX_FROM); + MLX5_SET(create_flow_group_in, in, end_flow_index, + MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_IDX_TO); + + fg = mlx5_create_flow_group(ingress_ft, in); + kvfree(in); + if (IS_ERR(fg)) + esw_warn(esw->dev, + "Failed to create IGMP flow group for bridge ingress table (err=%pe)\n", + fg); + + return fg; +} + +static struct mlx5_flow_group * +mlx5_esw_bridge_ingress_mld_fg_create(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ingress_ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in, *match; + + if (!(MLX5_CAP_GEN(esw->dev, flex_parser_protocols) & MLX5_FLEX_PROTO_ICMPV6)) { + esw_warn(esw->dev, + "Can't create MLD flow group due to missing hardware ICMPv6 parsing support\n"); + return NULL; + } + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_3); + match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ip_version); + MLX5_SET_TO_ONES(fte_match_param, match, misc_parameters_3.icmpv6_type); + + MLX5_SET(create_flow_group_in, in, start_flow_index, + MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_IDX_FROM); + MLX5_SET(create_flow_group_in, in, end_flow_index, + MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_IDX_TO); + + fg = mlx5_create_flow_group(ingress_ft, in); + kvfree(in); + if (IS_ERR(fg)) + esw_warn(esw->dev, + "Failed to create MLD flow group for bridge ingress table (err=%pe)\n", + fg); + + return fg; +} + +static int +mlx5_esw_bridge_ingress_mcast_fgs_init(struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_flow_table *ingress_ft = br_offloads->ingress_ft; + struct mlx5_eswitch *esw = br_offloads->esw; + struct mlx5_flow_group *igmp_fg, *mld_fg; + + igmp_fg = mlx5_esw_bridge_ingress_igmp_fg_create(esw, ingress_ft); + if (IS_ERR(igmp_fg)) + return PTR_ERR(igmp_fg); + + mld_fg = mlx5_esw_bridge_ingress_mld_fg_create(esw, ingress_ft); + if (IS_ERR(mld_fg)) { + mlx5_destroy_flow_group(igmp_fg); + return PTR_ERR(mld_fg); + } + + br_offloads->ingress_igmp_fg = igmp_fg; + br_offloads->ingress_mld_fg = mld_fg; + return 0; +} + +static void +mlx5_esw_bridge_ingress_mcast_fgs_cleanup(struct mlx5_esw_bridge_offloads *br_offloads) +{ + if (br_offloads->ingress_mld_fg) + mlx5_destroy_flow_group(br_offloads->ingress_mld_fg); + br_offloads->ingress_mld_fg = NULL; + if (br_offloads->ingress_igmp_fg) + mlx5_destroy_flow_group(br_offloads->ingress_igmp_fg); + br_offloads->ingress_igmp_fg = NULL; +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_ingress_igmp_fh_create(struct mlx5_flow_table *ingress_ft, + struct mlx5_flow_table *skip_ft) +{ + struct mlx5_flow_destination dest = { + .type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE, + .ft = skip_ft, + }; + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flags = FLOW_ACT_NO_APPEND, + }; + struct mlx5_flow_spec *rule_spec; + struct mlx5_flow_handle *handle; + + rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); + if (!rule_spec) + return ERR_PTR(-ENOMEM); + + rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_version, 4); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_protocol, IPPROTO_IGMP); + + handle = mlx5_add_flow_rules(ingress_ft, rule_spec, &flow_act, &dest, 1); + + kvfree(rule_spec); + return handle; +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_ingress_mld_fh_create(u8 type, struct mlx5_flow_table *ingress_ft, + struct mlx5_flow_table *skip_ft) +{ + struct mlx5_flow_destination dest = { + .type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE, + .ft = skip_ft, + }; + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flags = FLOW_ACT_NO_APPEND, + }; + struct mlx5_flow_spec *rule_spec; + struct mlx5_flow_handle *handle; + + rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); + if (!rule_spec) + return ERR_PTR(-ENOMEM); + + rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_3; + + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_version, 6); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, misc_parameters_3.icmpv6_type); + MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_3.icmpv6_type, type); + + handle = mlx5_add_flow_rules(ingress_ft, rule_spec, &flow_act, &dest, 1); + + kvfree(rule_spec); + return handle; +} + +static int +mlx5_esw_bridge_ingress_mcast_fhs_create(struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_flow_handle *igmp_handle, *mld_query_handle, *mld_report_handle, + *mld_done_handle; + struct mlx5_flow_table *ingress_ft = br_offloads->ingress_ft, + *skip_ft = br_offloads->skip_ft; + int err; + + igmp_handle = mlx5_esw_bridge_ingress_igmp_fh_create(ingress_ft, skip_ft); + if (IS_ERR(igmp_handle)) + return PTR_ERR(igmp_handle); + + if (br_offloads->ingress_mld_fg) { + mld_query_handle = mlx5_esw_bridge_ingress_mld_fh_create(ICMPV6_MGM_QUERY, + ingress_ft, + skip_ft); + if (IS_ERR(mld_query_handle)) { + err = PTR_ERR(mld_query_handle); + goto err_mld_query; + } + + mld_report_handle = mlx5_esw_bridge_ingress_mld_fh_create(ICMPV6_MGM_REPORT, + ingress_ft, + skip_ft); + if (IS_ERR(mld_report_handle)) { + err = PTR_ERR(mld_report_handle); + goto err_mld_report; + } + + mld_done_handle = mlx5_esw_bridge_ingress_mld_fh_create(ICMPV6_MGM_REDUCTION, + ingress_ft, + skip_ft); + if (IS_ERR(mld_done_handle)) { + err = PTR_ERR(mld_done_handle); + goto err_mld_done; + } + } else { + mld_query_handle = NULL; + mld_report_handle = NULL; + mld_done_handle = NULL; + } + + br_offloads->igmp_handle = igmp_handle; + br_offloads->mld_query_handle = mld_query_handle; + br_offloads->mld_report_handle = mld_report_handle; + br_offloads->mld_done_handle = mld_done_handle; + + return 0; + +err_mld_done: + mlx5_del_flow_rules(mld_report_handle); +err_mld_report: + mlx5_del_flow_rules(mld_query_handle); +err_mld_query: + mlx5_del_flow_rules(igmp_handle); + return err; +} + +static void +mlx5_esw_bridge_ingress_mcast_fhs_cleanup(struct mlx5_esw_bridge_offloads *br_offloads) +{ + if (br_offloads->mld_done_handle) + mlx5_del_flow_rules(br_offloads->mld_done_handle); + br_offloads->mld_done_handle = NULL; + if (br_offloads->mld_report_handle) + mlx5_del_flow_rules(br_offloads->mld_report_handle); + br_offloads->mld_report_handle = NULL; + if (br_offloads->mld_query_handle) + mlx5_del_flow_rules(br_offloads->mld_query_handle); + br_offloads->mld_query_handle = NULL; + if (br_offloads->igmp_handle) + mlx5_del_flow_rules(br_offloads->igmp_handle); + br_offloads->igmp_handle = NULL; +} + +static int mlx5_esw_brige_mcast_init(struct mlx5_esw_bridge *bridge) +{ + struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads; + struct mlx5_esw_bridge_port *port, *failed; + unsigned long i; + int err; + + xa_for_each(&br_offloads->ports, i, port) { + if (port->bridge != bridge) + continue; + + err = mlx5_esw_bridge_port_mcast_init(port); + if (err) { + failed = port; + goto err_port; + } + } + return 0; + +err_port: + xa_for_each(&br_offloads->ports, i, port) { + if (port == failed) + break; + if (port->bridge != bridge) + continue; + + mlx5_esw_bridge_port_mcast_cleanup(port); + } + return err; +} + +static void mlx5_esw_brige_mcast_cleanup(struct mlx5_esw_bridge *bridge) +{ + struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads; + struct mlx5_esw_bridge_port *port; + unsigned long i; + + xa_for_each(&br_offloads->ports, i, port) { + if (port->bridge != bridge) + continue; + + mlx5_esw_bridge_port_mcast_cleanup(port); + } +} + +static int mlx5_esw_brige_mcast_global_enable(struct mlx5_esw_bridge_offloads *br_offloads) +{ + int err; + + if (br_offloads->ingress_igmp_fg) + return 0; /* already enabled by another bridge */ + + err = mlx5_esw_bridge_ingress_mcast_fgs_init(br_offloads); + if (err) { + esw_warn(br_offloads->esw->dev, + "Failed to create global multicast flow groups (err=%d)\n", + err); + return err; + } + + err = mlx5_esw_bridge_ingress_mcast_fhs_create(br_offloads); + if (err) { + esw_warn(br_offloads->esw->dev, + "Failed to create global multicast flows (err=%d)\n", + err); + goto err_fhs; + } + + return 0; + +err_fhs: + mlx5_esw_bridge_ingress_mcast_fgs_cleanup(br_offloads); + return err; +} + +static void mlx5_esw_brige_mcast_global_disable(struct mlx5_esw_bridge_offloads *br_offloads) +{ + struct mlx5_esw_bridge *br; + + list_for_each_entry(br, &br_offloads->bridges, list) { + /* Ingress table is global, so only disable snooping when all + * bridges on esw have multicast disabled. + */ + if (br->flags & MLX5_ESW_BRIDGE_MCAST_FLAG) + return; + } + + mlx5_esw_bridge_ingress_mcast_fhs_cleanup(br_offloads); + mlx5_esw_bridge_ingress_mcast_fgs_cleanup(br_offloads); +} + +int mlx5_esw_bridge_mcast_enable(struct mlx5_esw_bridge *bridge) +{ + int err; + + err = mlx5_esw_brige_mcast_global_enable(bridge->br_offloads); + if (err) + return err; + + bridge->flags |= MLX5_ESW_BRIDGE_MCAST_FLAG; + + err = mlx5_esw_brige_mcast_init(bridge); + if (err) { + esw_warn(bridge->br_offloads->esw->dev, "Failed to enable multicast (err=%d)\n", + err); + bridge->flags &= ~MLX5_ESW_BRIDGE_MCAST_FLAG; + mlx5_esw_brige_mcast_global_disable(bridge->br_offloads); + } + return err; +} + +void mlx5_esw_bridge_mcast_disable(struct mlx5_esw_bridge *bridge) +{ + mlx5_esw_brige_mcast_cleanup(bridge); + bridge->flags &= ~MLX5_ESW_BRIDGE_MCAST_FLAG; + mlx5_esw_brige_mcast_global_disable(bridge->br_offloads); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h new file mode 100644 index 0000000000..7c251af566 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h @@ -0,0 +1,251 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef _MLX5_ESW_BRIDGE_PRIVATE_ +#define _MLX5_ESW_BRIDGE_PRIVATE_ + +#include +#include +#include +#include +#include +#include +#include "fs_core.h" + +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_SIZE 1 +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_SIZE 3 +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE 131072 +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_UNTAGGED_GRP_SIZE \ + (524288 - MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_SIZE - \ + MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_SIZE) + +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_IDX_FROM 0 +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_IGMP_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_MLD_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_FILTER_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_QINQ_FILTER_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_INGRESS_TABLE_UNTAGGED_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO + 1) +static_assert(MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE == 1048576); + +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE 131072 +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_SIZE (262144 - 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_FROM 0 +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_QINQ_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO \ + MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_FROM +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_MISS_GRP_IDX_TO + 1) +static_assert(MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE == 524288); + +#define MLX5_ESW_BRIDGE_SKIP_TABLE_SIZE 0 + +#define MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_SIZE 1 +#define MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_SIZE 1 +#define MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_SIZE 4095 +#define MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_SIZE MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_SIZE +#define MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_FROM 0 +#define MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_MCAST_TABLE_VLAN_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_SIZE - 1) +#define MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_MCAST_TABLE_QINQ_GRP_IDX_TO + 1) +#define MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_TO \ + (MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_FROM + \ + MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_SIZE - 1) + +#define MLX5_ESW_BRIDGE_MCAST_TABLE_SIZE \ + (MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_TO + 1) +static_assert(MLX5_ESW_BRIDGE_MCAST_TABLE_SIZE == 8192); + +enum { + MLX5_ESW_BRIDGE_LEVEL_INGRESS_TABLE, + MLX5_ESW_BRIDGE_LEVEL_EGRESS_TABLE, + MLX5_ESW_BRIDGE_LEVEL_MCAST_TABLE, + MLX5_ESW_BRIDGE_LEVEL_SKIP_TABLE, +}; + +enum { + MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG = BIT(0), + MLX5_ESW_BRIDGE_MCAST_FLAG = BIT(1), +}; + +struct mlx5_esw_bridge_fdb_key { + unsigned char addr[ETH_ALEN]; + u16 vid; +}; + +struct mlx5_esw_bridge_mdb_key { + unsigned char addr[ETH_ALEN]; + u16 vid; +}; + +enum { + MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER = BIT(0), + MLX5_ESW_BRIDGE_FLAG_PEER = BIT(1), + MLX5_ESW_BRIDGE_FLAG_DELETED = BIT(2), +}; + +enum { + MLX5_ESW_BRIDGE_PORT_FLAG_PEER = BIT(0), +}; + +struct mlx5_esw_bridge_fdb_entry { + struct mlx5_esw_bridge_fdb_key key; + struct rhash_head ht_node; + struct net_device *dev; + struct list_head list; + struct list_head vlan_list; + u16 vport_num; + u16 esw_owner_vhca_id; + u16 flags; + + struct mlx5_flow_handle *ingress_handle; + struct mlx5_fc *ingress_counter; + unsigned long lastuse; + struct mlx5_flow_handle *egress_handle; + struct mlx5_flow_handle *filter_handle; +}; + +struct mlx5_esw_bridge_mdb_entry { + struct mlx5_esw_bridge_mdb_key key; + struct rhash_head ht_node; + struct list_head list; + struct xarray ports; + int num_ports; + + struct mlx5_flow_handle *egress_handle; +}; + +struct mlx5_esw_bridge_vlan { + u16 vid; + u16 flags; + struct list_head fdb_list; + struct mlx5_pkt_reformat *pkt_reformat_push; + struct mlx5_pkt_reformat *pkt_reformat_pop; + struct mlx5_modify_hdr *pkt_mod_hdr_push_mark; + struct mlx5_flow_handle *mcast_handle; +}; + +struct mlx5_esw_bridge_port { + u16 vport_num; + u16 esw_owner_vhca_id; + u16 flags; + struct mlx5_esw_bridge *bridge; + struct xarray vlans; + struct { + struct mlx5_flow_table *ft; + struct mlx5_flow_group *filter_fg; + struct mlx5_flow_group *vlan_fg; + struct mlx5_flow_group *qinq_fg; + struct mlx5_flow_group *fwd_fg; + + struct mlx5_flow_handle *filter_handle; + struct mlx5_flow_handle *fwd_handle; + } mcast; +}; + +struct mlx5_esw_bridge { + int ifindex; + int refcnt; + struct list_head list; + struct mlx5_esw_bridge_offloads *br_offloads; + struct dentry *debugfs_dir; + + struct list_head fdb_list; + struct rhashtable fdb_ht; + + struct list_head mdb_list; + struct rhashtable mdb_ht; + + struct mlx5_flow_table *egress_ft; + struct mlx5_flow_group *egress_vlan_fg; + struct mlx5_flow_group *egress_qinq_fg; + struct mlx5_flow_group *egress_mac_fg; + struct mlx5_flow_group *egress_miss_fg; + struct mlx5_pkt_reformat *egress_miss_pkt_reformat; + struct mlx5_flow_handle *egress_miss_handle; + unsigned long ageing_time; + u32 flags; + u16 vlan_proto; +}; + +struct mlx5_flow_table *mlx5_esw_bridge_table_create(int max_fte, u32 level, + struct mlx5_eswitch *esw); +unsigned long mlx5_esw_bridge_port_key(struct mlx5_esw_bridge_port *port); + +int mlx5_esw_bridge_port_mcast_init(struct mlx5_esw_bridge_port *port); +void mlx5_esw_bridge_port_mcast_cleanup(struct mlx5_esw_bridge_port *port); +int mlx5_esw_bridge_vlan_mcast_init(u16 vlan_proto, struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_vlan *vlan); +void mlx5_esw_bridge_vlan_mcast_cleanup(struct mlx5_esw_bridge_vlan *vlan); + +int mlx5_esw_bridge_mcast_enable(struct mlx5_esw_bridge *bridge); +void mlx5_esw_bridge_mcast_disable(struct mlx5_esw_bridge *bridge); + +int mlx5_esw_bridge_mdb_init(struct mlx5_esw_bridge *bridge); +void mlx5_esw_bridge_mdb_cleanup(struct mlx5_esw_bridge *bridge); +int mlx5_esw_bridge_port_mdb_attach(struct net_device *dev, struct mlx5_esw_bridge_port *port, + const unsigned char *addr, u16 vid); +void mlx5_esw_bridge_port_mdb_detach(struct net_device *dev, struct mlx5_esw_bridge_port *port, + const unsigned char *addr, u16 vid); +void mlx5_esw_bridge_port_mdb_vlan_flush(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge_vlan *vlan); +void mlx5_esw_bridge_mdb_flush(struct mlx5_esw_bridge *bridge); + +void mlx5_esw_bridge_debugfs_offloads_init(struct mlx5_esw_bridge_offloads *br_offloads); +void mlx5_esw_bridge_debugfs_offloads_cleanup(struct mlx5_esw_bridge_offloads *br_offloads); +void mlx5_esw_bridge_debugfs_init(struct net_device *br_netdev, struct mlx5_esw_bridge *bridge); +void mlx5_esw_bridge_debugfs_cleanup(struct mlx5_esw_bridge *bridge); + +#endif /* _MLX5_ESW_BRIDGE_PRIVATE_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c new file mode 100644 index 0000000000..d8e739cbcb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#include +#include "eswitch.h" + +static void +mlx5_esw_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_item_id *ppid) +{ + u64 parent_id; + + parent_id = mlx5_query_nic_system_image_guid(dev); + ppid->id_len = sizeof(parent_id); + memcpy(ppid->id, &parent_id, sizeof(parent_id)); +} + +static bool mlx5_esw_devlink_port_supported(struct mlx5_eswitch *esw, u16 vport_num) +{ + return (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF) || + mlx5_eswitch_is_vf_vport(esw, vport_num) || + mlx5_core_is_ec_vf_vport(esw->dev, vport_num); +} + +static void mlx5_esw_offloads_pf_vf_devlink_port_attrs_set(struct mlx5_eswitch *esw, + u16 vport_num, + struct devlink_port *dl_port) +{ + struct mlx5_core_dev *dev = esw->dev; + struct netdev_phys_item_id ppid = {}; + u32 controller_num = 0; + bool external; + u16 pfnum; + + mlx5_esw_get_port_parent_id(dev, &ppid); + pfnum = mlx5_get_dev_index(dev); + external = mlx5_core_is_ecpf_esw_manager(dev); + if (external) + controller_num = dev->priv.eswitch->offloads.host_number + 1; + + if (vport_num == MLX5_VPORT_PF) { + memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len); + dl_port->attrs.switch_id.id_len = ppid.id_len; + devlink_port_attrs_pci_pf_set(dl_port, controller_num, pfnum, external); + } else if (mlx5_eswitch_is_vf_vport(esw, vport_num)) { + memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len); + dl_port->attrs.switch_id.id_len = ppid.id_len; + devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum, + vport_num - 1, external); + } else if (mlx5_core_is_ec_vf_vport(esw->dev, vport_num)) { + memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len); + dl_port->attrs.switch_id.id_len = ppid.id_len; + devlink_port_attrs_pci_vf_set(dl_port, 0, pfnum, + vport_num - 1, false); + } +} + +int mlx5_esw_offloads_pf_vf_devlink_port_init(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + struct mlx5_devlink_port *dl_port; + u16 vport_num = vport->vport; + + if (!mlx5_esw_devlink_port_supported(esw, vport_num)) + return 0; + + dl_port = kzalloc(sizeof(*dl_port), GFP_KERNEL); + if (!dl_port) + return -ENOMEM; + + mlx5_esw_offloads_pf_vf_devlink_port_attrs_set(esw, vport_num, + &dl_port->dl_port); + + vport->dl_port = dl_port; + mlx5_devlink_port_init(dl_port, vport); + return 0; +} + +void mlx5_esw_offloads_pf_vf_devlink_port_cleanup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (!vport->dl_port) + return; + + kfree(vport->dl_port); + vport->dl_port = NULL; +} + +static const struct devlink_port_ops mlx5_esw_pf_vf_dl_port_ops = { + .port_fn_hw_addr_get = mlx5_devlink_port_fn_hw_addr_get, + .port_fn_hw_addr_set = mlx5_devlink_port_fn_hw_addr_set, + .port_fn_roce_get = mlx5_devlink_port_fn_roce_get, + .port_fn_roce_set = mlx5_devlink_port_fn_roce_set, + .port_fn_migratable_get = mlx5_devlink_port_fn_migratable_get, + .port_fn_migratable_set = mlx5_devlink_port_fn_migratable_set, +#ifdef CONFIG_XFRM_OFFLOAD + .port_fn_ipsec_crypto_get = mlx5_devlink_port_fn_ipsec_crypto_get, + .port_fn_ipsec_crypto_set = mlx5_devlink_port_fn_ipsec_crypto_set, + .port_fn_ipsec_packet_get = mlx5_devlink_port_fn_ipsec_packet_get, + .port_fn_ipsec_packet_set = mlx5_devlink_port_fn_ipsec_packet_set, +#endif /* CONFIG_XFRM_OFFLOAD */ +}; + +static void mlx5_esw_offloads_sf_devlink_port_attrs_set(struct mlx5_eswitch *esw, + struct devlink_port *dl_port, + u32 controller, u32 sfnum) +{ + struct mlx5_core_dev *dev = esw->dev; + struct netdev_phys_item_id ppid = {}; + u16 pfnum; + + pfnum = mlx5_get_dev_index(dev); + mlx5_esw_get_port_parent_id(dev, &ppid); + memcpy(dl_port->attrs.switch_id.id, &ppid.id[0], ppid.id_len); + dl_port->attrs.switch_id.id_len = ppid.id_len; + devlink_port_attrs_pci_sf_set(dl_port, controller, pfnum, sfnum, !!controller); +} + +int mlx5_esw_offloads_sf_devlink_port_init(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + struct mlx5_devlink_port *dl_port, + u32 controller, u32 sfnum) +{ + mlx5_esw_offloads_sf_devlink_port_attrs_set(esw, &dl_port->dl_port, controller, sfnum); + + vport->dl_port = dl_port; + mlx5_devlink_port_init(dl_port, vport); + return 0; +} + +void mlx5_esw_offloads_sf_devlink_port_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + vport->dl_port = NULL; +} + +static const struct devlink_port_ops mlx5_esw_dl_sf_port_ops = { +#ifdef CONFIG_MLX5_SF_MANAGER + .port_del = mlx5_devlink_sf_port_del, +#endif + .port_fn_hw_addr_get = mlx5_devlink_port_fn_hw_addr_get, + .port_fn_hw_addr_set = mlx5_devlink_port_fn_hw_addr_set, + .port_fn_roce_get = mlx5_devlink_port_fn_roce_get, + .port_fn_roce_set = mlx5_devlink_port_fn_roce_set, +#ifdef CONFIG_MLX5_SF_MANAGER + .port_fn_state_get = mlx5_devlink_sf_port_fn_state_get, + .port_fn_state_set = mlx5_devlink_sf_port_fn_state_set, +#endif +}; + +int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + struct mlx5_core_dev *dev = esw->dev; + const struct devlink_port_ops *ops; + struct mlx5_devlink_port *dl_port; + u16 vport_num = vport->vport; + unsigned int dl_port_index; + struct devlink *devlink; + int err; + + dl_port = vport->dl_port; + if (!dl_port) + return 0; + + if (mlx5_esw_is_sf_vport(esw, vport_num)) + ops = &mlx5_esw_dl_sf_port_ops; + else if (mlx5_eswitch_is_pf_vf_vport(esw, vport_num)) + ops = &mlx5_esw_pf_vf_dl_port_ops; + else + ops = NULL; + + devlink = priv_to_devlink(dev); + dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num); + err = devl_port_register_with_ops(devlink, &dl_port->dl_port, dl_port_index, ops); + if (err) + return err; + + err = devl_rate_leaf_create(&dl_port->dl_port, vport, NULL); + if (err) + goto rate_err; + + return 0; + +rate_err: + devl_port_unregister(&dl_port->dl_port); + return err; +} + +void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + struct mlx5_devlink_port *dl_port; + + if (!vport->dl_port) + return; + dl_port = vport->dl_port; + + mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL); + devl_rate_leaf_destroy(&dl_port->dl_port); + + devl_port_unregister(&dl_port->dl_port); +} + +struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + return IS_ERR(vport) ? ERR_CAST(vport) : &vport->dl_port->dl_port; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h new file mode 100644 index 0000000000..1808da2140 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h @@ -0,0 +1,155 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_ESW_BRIDGE_TRACEPOINT_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_ESW_BRIDGE_TRACEPOINT_ + +#include +#include "../bridge_priv.h" + +DECLARE_EVENT_CLASS(mlx5_esw_bridge_fdb_template, + TP_PROTO(const struct mlx5_esw_bridge_fdb_entry *fdb), + TP_ARGS(fdb), + TP_STRUCT__entry( + __array(char, dev_name, IFNAMSIZ) + __array(unsigned char, addr, ETH_ALEN) + __field(u16, vid) + __field(u16, flags) + __field(unsigned int, used) + ), + TP_fast_assign( + strscpy(__entry->dev_name, + netdev_name(fdb->dev), + IFNAMSIZ); + memcpy(__entry->addr, fdb->key.addr, ETH_ALEN); + __entry->vid = fdb->key.vid; + __entry->flags = fdb->flags; + __entry->used = jiffies_to_msecs(jiffies - fdb->lastuse) + ), + TP_printk("net_device=%s addr=%pM vid=%hu flags=%hx used=%u", + __entry->dev_name, + __entry->addr, + __entry->vid, + __entry->flags, + __entry->used / 1000) + ); + +DEFINE_EVENT(mlx5_esw_bridge_fdb_template, + mlx5_esw_bridge_fdb_entry_init, + TP_PROTO(const struct mlx5_esw_bridge_fdb_entry *fdb), + TP_ARGS(fdb) + ); +DEFINE_EVENT(mlx5_esw_bridge_fdb_template, + mlx5_esw_bridge_fdb_entry_refresh, + TP_PROTO(const struct mlx5_esw_bridge_fdb_entry *fdb), + TP_ARGS(fdb) + ); +DEFINE_EVENT(mlx5_esw_bridge_fdb_template, + mlx5_esw_bridge_fdb_entry_cleanup, + TP_PROTO(const struct mlx5_esw_bridge_fdb_entry *fdb), + TP_ARGS(fdb) + ); + +DECLARE_EVENT_CLASS(mlx5_esw_bridge_vlan_template, + TP_PROTO(const struct mlx5_esw_bridge_vlan *vlan), + TP_ARGS(vlan), + TP_STRUCT__entry( + __field(u16, vid) + __field(u16, flags) + ), + TP_fast_assign( + __entry->vid = vlan->vid; + __entry->flags = vlan->flags; + ), + TP_printk("vid=%hu flags=%hx", + __entry->vid, + __entry->flags) + ); + +DEFINE_EVENT(mlx5_esw_bridge_vlan_template, + mlx5_esw_bridge_vlan_create, + TP_PROTO(const struct mlx5_esw_bridge_vlan *vlan), + TP_ARGS(vlan) + ); +DEFINE_EVENT(mlx5_esw_bridge_vlan_template, + mlx5_esw_bridge_vlan_cleanup, + TP_PROTO(const struct mlx5_esw_bridge_vlan *vlan), + TP_ARGS(vlan) + ); + +DECLARE_EVENT_CLASS(mlx5_esw_bridge_port_template, + TP_PROTO(const struct mlx5_esw_bridge_port *port), + TP_ARGS(port), + TP_STRUCT__entry( + __field(u16, vport_num) + __field(u16, esw_owner_vhca_id) + __field(u16, flags) + ), + TP_fast_assign( + __entry->vport_num = port->vport_num; + __entry->esw_owner_vhca_id = port->esw_owner_vhca_id; + __entry->flags = port->flags; + ), + TP_printk("vport_num=%hu esw_owner_vhca_id=%hu flags=%hx", + __entry->vport_num, + __entry->esw_owner_vhca_id, + __entry->flags) + ); + +DEFINE_EVENT(mlx5_esw_bridge_port_template, + mlx5_esw_bridge_vport_init, + TP_PROTO(const struct mlx5_esw_bridge_port *port), + TP_ARGS(port) + ); +DEFINE_EVENT(mlx5_esw_bridge_port_template, + mlx5_esw_bridge_vport_cleanup, + TP_PROTO(const struct mlx5_esw_bridge_port *port), + TP_ARGS(port) + ); + +DECLARE_EVENT_CLASS(mlx5_esw_bridge_mdb_port_change_template, + TP_PROTO(const struct net_device *dev, + const struct mlx5_esw_bridge_mdb_entry *mdb), + TP_ARGS(dev, mdb), + TP_STRUCT__entry( + __array(char, dev_name, IFNAMSIZ) + __array(unsigned char, addr, ETH_ALEN) + __field(u16, vid) + __field(int, num_ports) + __field(bool, offloaded)), + TP_fast_assign( + strscpy(__entry->dev_name, netdev_name(dev), IFNAMSIZ); + memcpy(__entry->addr, mdb->key.addr, ETH_ALEN); + __entry->vid = mdb->key.vid; + __entry->num_ports = mdb->num_ports; + __entry->offloaded = mdb->egress_handle;), + TP_printk("net_device=%s addr=%pM vid=%u num_ports=%d offloaded=%d", + __entry->dev_name, + __entry->addr, + __entry->vid, + __entry->num_ports, + __entry->offloaded)); + +DEFINE_EVENT(mlx5_esw_bridge_mdb_port_change_template, + mlx5_esw_bridge_port_mdb_attach, + TP_PROTO(const struct net_device *dev, + const struct mlx5_esw_bridge_mdb_entry *mdb), + TP_ARGS(dev, mdb)); + +DEFINE_EVENT(mlx5_esw_bridge_mdb_port_change_template, + mlx5_esw_bridge_port_mdb_detach, + TP_PROTO(const struct net_device *dev, + const struct mlx5_esw_bridge_mdb_entry *mdb), + TP_ARGS(dev, mdb)); + +#endif + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH esw/diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE bridge_tracepoint +#include diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h new file mode 100644 index 0000000000..458baf0c64 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_ESW_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_ESW_TP_ + +#include +#include "eswitch.h" + +TRACE_EVENT(mlx5_esw_vport_qos_destroy, + TP_PROTO(const struct mlx5_vport *vport), + TP_ARGS(vport), + TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device)) + __field(unsigned short, vport_id) + __field(unsigned int, tsar_ix) + ), + TP_fast_assign(__assign_str(devname, dev_name(vport->dev->device)); + __entry->vport_id = vport->vport; + __entry->tsar_ix = vport->qos.esw_tsar_ix; + ), + TP_printk("(%s) vport=%hu tsar_ix=%u\n", + __get_str(devname), __entry->vport_id, __entry->tsar_ix + ) +); + +DECLARE_EVENT_CLASS(mlx5_esw_vport_qos_template, + TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate), + TP_ARGS(vport, bw_share, max_rate), + TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device)) + __field(unsigned short, vport_id) + __field(unsigned int, tsar_ix) + __field(unsigned int, bw_share) + __field(unsigned int, max_rate) + __field(void *, group) + ), + TP_fast_assign(__assign_str(devname, dev_name(vport->dev->device)); + __entry->vport_id = vport->vport; + __entry->tsar_ix = vport->qos.esw_tsar_ix; + __entry->bw_share = bw_share; + __entry->max_rate = max_rate; + __entry->group = vport->qos.group; + ), + TP_printk("(%s) vport=%hu tsar_ix=%u bw_share=%u, max_rate=%u group=%p\n", + __get_str(devname), __entry->vport_id, __entry->tsar_ix, + __entry->bw_share, __entry->max_rate, __entry->group + ) +); + +DEFINE_EVENT(mlx5_esw_vport_qos_template, mlx5_esw_vport_qos_create, + TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate), + TP_ARGS(vport, bw_share, max_rate) + ); + +DEFINE_EVENT(mlx5_esw_vport_qos_template, mlx5_esw_vport_qos_config, + TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate), + TP_ARGS(vport, bw_share, max_rate) + ); + +DECLARE_EVENT_CLASS(mlx5_esw_group_qos_template, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_esw_rate_group *group, + unsigned int tsar_ix), + TP_ARGS(dev, group, tsar_ix), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(const void *, group) + __field(unsigned int, tsar_ix) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->group = group; + __entry->tsar_ix = tsar_ix; + ), + TP_printk("(%s) group=%p tsar_ix=%u\n", + __get_str(devname), __entry->group, __entry->tsar_ix + ) +); + +DEFINE_EVENT(mlx5_esw_group_qos_template, mlx5_esw_group_qos_create, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_esw_rate_group *group, + unsigned int tsar_ix), + TP_ARGS(dev, group, tsar_ix) + ); + +DEFINE_EVENT(mlx5_esw_group_qos_template, mlx5_esw_group_qos_destroy, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_esw_rate_group *group, + unsigned int tsar_ix), + TP_ARGS(dev, group, tsar_ix) + ); + +TRACE_EVENT(mlx5_esw_group_qos_config, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_esw_rate_group *group, + unsigned int tsar_ix, u32 bw_share, u32 max_rate), + TP_ARGS(dev, group, tsar_ix, bw_share, max_rate), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(const void *, group) + __field(unsigned int, tsar_ix) + __field(unsigned int, bw_share) + __field(unsigned int, max_rate) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->group = group; + __entry->tsar_ix = tsar_ix; + __entry->bw_share = bw_share; + __entry->max_rate = max_rate; + ), + TP_printk("(%s) group=%p tsar_ix=%u bw_share=%u max_rate=%u\n", + __get_str(devname), __entry->group, __entry->tsar_ix, + __entry->bw_share, __entry->max_rate + ) +); +#endif /* _MLX5_ESW_TP_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH esw/diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE qos_tracepoint +#include diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c new file mode 100644 index 0000000000..9959e9fd15 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c @@ -0,0 +1,382 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021 Mellanox Technologies. */ + +#include +#include +#include +#include +#include +#include +#include "mlx5_core.h" +#include "eswitch.h" +#include "en.h" +#include "en_tc.h" +#include "fs_core.h" +#include "esw/indir_table.h" +#include "lib/fs_chains.h" +#include "en/mod_hdr.h" + +#define MLX5_ESW_INDIR_TABLE_SIZE 2 +#define MLX5_ESW_INDIR_TABLE_RECIRC_IDX (MLX5_ESW_INDIR_TABLE_SIZE - 2) +#define MLX5_ESW_INDIR_TABLE_FWD_IDX (MLX5_ESW_INDIR_TABLE_SIZE - 1) + +struct mlx5_esw_indir_table_rule { + struct mlx5_flow_handle *handle; + struct mlx5_modify_hdr *mh; + refcount_t refcnt; +}; + +struct mlx5_esw_indir_table_entry { + struct hlist_node hlist; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *recirc_grp; + struct mlx5_flow_group *fwd_grp; + struct mlx5_flow_handle *fwd_rule; + struct mlx5_esw_indir_table_rule *recirc_rule; + int fwd_ref; + + u16 vport; +}; + +struct mlx5_esw_indir_table { + struct mutex lock; /* protects table */ + DECLARE_HASHTABLE(table, 8); +}; + +struct mlx5_esw_indir_table * +mlx5_esw_indir_table_init(void) +{ + struct mlx5_esw_indir_table *indir = kvzalloc(sizeof(*indir), GFP_KERNEL); + + if (!indir) + return ERR_PTR(-ENOMEM); + + mutex_init(&indir->lock); + hash_init(indir->table); + return indir; +} + +void +mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir) +{ + mutex_destroy(&indir->lock); + kvfree(indir); +} + +bool +mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + u16 vport_num, + struct mlx5_core_dev *dest_mdev) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + bool vf_sf_vport; + + vf_sf_vport = mlx5_eswitch_is_vf_vport(esw, vport_num) || + mlx5_esw_is_sf_vport(esw, vport_num); + + /* Use indirect table for all IP traffic from UL to VF with vport + * destination when source rewrite flag is set. + */ + return esw_attr->in_rep->vport == MLX5_VPORT_UPLINK && + vf_sf_vport && + esw->dev == dest_mdev && + attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE; +} + +u16 +mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + + return esw_attr->rx_tun_attr ? esw_attr->rx_tun_attr->decap_vport : 0; +} + +static int mlx5_esw_indir_table_rule_get(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_esw_indir_table_entry *e) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + struct mlx5e_tc_mod_hdr_acts mod_acts = {}; + struct mlx5_flow_destination dest = {}; + struct mlx5_esw_indir_table_rule *rule; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *handle; + int err = 0; + u32 data; + + if (e->recirc_rule) { + refcount_inc(&e->recirc_rule->refcnt); + return 0; + } + + rule = kzalloc(sizeof(*rule), GFP_KERNEL); + if (!rule) + return -ENOMEM; + + /* Modify flow source to recirculate packet */ + data = mlx5_eswitch_get_vport_metadata_for_set(esw, esw_attr->rx_tun_attr->decap_vport); + err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, + VPORT_TO_REG, data); + if (err) + goto err_mod_hdr_regc0; + + err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, + TUNNEL_TO_REG, ESW_TUN_SLOW_TABLE_GOTO_VPORT); + if (err) + goto err_mod_hdr_regc1; + + flow_act.modify_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB, + mod_acts.num_actions, mod_acts.actions); + if (IS_ERR(flow_act.modify_hdr)) { + err = PTR_ERR(flow_act.modify_hdr); + goto err_mod_hdr_alloc; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND; + flow_act.fg = e->recirc_grp; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = mlx5_chains_get_table(chains, 0, 1, 0); + if (IS_ERR(dest.ft)) { + err = PTR_ERR(dest.ft); + goto err_table; + } + handle = mlx5_add_flow_rules(e->ft, NULL, &flow_act, &dest, 1); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto err_handle; + } + + mlx5e_mod_hdr_dealloc(&mod_acts); + rule->handle = handle; + rule->mh = flow_act.modify_hdr; + refcount_set(&rule->refcnt, 1); + e->recirc_rule = rule; + return 0; + +err_handle: + mlx5_chains_put_table(chains, 0, 1, 0); +err_table: + mlx5_modify_header_dealloc(esw->dev, flow_act.modify_hdr); +err_mod_hdr_alloc: +err_mod_hdr_regc1: + mlx5e_mod_hdr_dealloc(&mod_acts); +err_mod_hdr_regc0: + kfree(rule); + return err; +} + +static void mlx5_esw_indir_table_rule_put(struct mlx5_eswitch *esw, + struct mlx5_esw_indir_table_entry *e) +{ + struct mlx5_esw_indir_table_rule *rule = e->recirc_rule; + struct mlx5_fs_chains *chains = esw_chains(esw); + + if (!rule) + return; + + if (!refcount_dec_and_test(&rule->refcnt)) + return; + + mlx5_del_flow_rules(rule->handle); + mlx5_chains_put_table(chains, 0, 1, 0); + mlx5_modify_header_dealloc(esw->dev, rule->mh); + kfree(rule); + e->recirc_rule = NULL; +} + +static int mlx5_create_indir_recirc_group(struct mlx5_esw_indir_table_entry *e) +{ + int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + u32 *in; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_flow_group_in, in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_RECIRC_IDX); + e->recirc_grp = mlx5_create_flow_group(e->ft, in); + if (IS_ERR(e->recirc_grp)) + err = PTR_ERR(e->recirc_grp); + + kvfree(in); + return err; +} + +static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw, + struct mlx5_esw_indir_table_entry *e) +{ + int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + u32 *in; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + /* Hold one entry */ + MLX5_SET(create_flow_group_in, in, start_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX); + MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX); + e->fwd_grp = mlx5_create_flow_group(e->ft, in); + if (IS_ERR(e->fwd_grp)) { + err = PTR_ERR(e->fwd_grp); + goto err_out; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_act.fg = e->fwd_grp; + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = e->vport; + dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); + dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; + e->fwd_rule = mlx5_add_flow_rules(e->ft, NULL, &flow_act, &dest, 1); + if (IS_ERR(e->fwd_rule)) { + mlx5_destroy_flow_group(e->fwd_grp); + err = PTR_ERR(e->fwd_rule); + } + +err_out: + kvfree(in); + return err; +} + +static struct mlx5_esw_indir_table_entry * +mlx5_esw_indir_table_entry_create(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr, + u16 vport, bool decap) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *root_ns; + struct mlx5_esw_indir_table_entry *e; + struct mlx5_flow_table *ft; + int err = 0; + + root_ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) + return ERR_PTR(-ENOENT); + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) + return ERR_PTR(-ENOMEM); + + ft_attr.prio = FDB_TC_OFFLOAD; + ft_attr.max_fte = MLX5_ESW_INDIR_TABLE_SIZE; + ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; + ft_attr.level = 1; + + ft = mlx5_create_flow_table(root_ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto tbl_err; + } + e->ft = ft; + e->vport = vport; + e->fwd_ref = !decap; + + err = mlx5_create_indir_recirc_group(e); + if (err) + goto recirc_grp_err; + + if (decap) { + err = mlx5_esw_indir_table_rule_get(esw, attr, e); + if (err) + goto recirc_rule_err; + } + + err = mlx5_create_indir_fwd_group(esw, e); + if (err) + goto fwd_grp_err; + + hash_add(esw->fdb_table.offloads.indir->table, &e->hlist, + vport << 16); + + return e; + +fwd_grp_err: + if (decap) + mlx5_esw_indir_table_rule_put(esw, e); +recirc_rule_err: + mlx5_destroy_flow_group(e->recirc_grp); +recirc_grp_err: + mlx5_destroy_flow_table(e->ft); +tbl_err: + kfree(e); + return ERR_PTR(err); +} + +static struct mlx5_esw_indir_table_entry * +mlx5_esw_indir_table_entry_lookup(struct mlx5_eswitch *esw, u16 vport) +{ + struct mlx5_esw_indir_table_entry *e; + u32 key = vport << 16; + + hash_for_each_possible(esw->fdb_table.offloads.indir->table, e, hlist, key) + if (e->vport == vport) + return e; + + return NULL; +} + +struct mlx5_flow_table *mlx5_esw_indir_table_get(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + u16 vport, bool decap) +{ + struct mlx5_esw_indir_table_entry *e; + int err; + + mutex_lock(&esw->fdb_table.offloads.indir->lock); + e = mlx5_esw_indir_table_entry_lookup(esw, vport); + if (e) { + if (!decap) { + e->fwd_ref++; + } else { + err = mlx5_esw_indir_table_rule_get(esw, attr, e); + if (err) + goto out_err; + } + } else { + e = mlx5_esw_indir_table_entry_create(esw, attr, vport, decap); + if (IS_ERR(e)) { + err = PTR_ERR(e); + esw_warn(esw->dev, "Failed to create indirection table, err %d.\n", err); + goto out_err; + } + } + mutex_unlock(&esw->fdb_table.offloads.indir->lock); + return e->ft; + +out_err: + mutex_unlock(&esw->fdb_table.offloads.indir->lock); + return ERR_PTR(err); +} + +void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw, + u16 vport, bool decap) +{ + struct mlx5_esw_indir_table_entry *e; + + mutex_lock(&esw->fdb_table.offloads.indir->lock); + e = mlx5_esw_indir_table_entry_lookup(esw, vport); + if (!e) + goto out; + + if (!decap) + e->fwd_ref--; + else + mlx5_esw_indir_table_rule_put(esw, e); + + if (e->fwd_ref || e->recirc_rule) + goto out; + + hash_del(&e->hlist); + mlx5_destroy_flow_group(e->recirc_grp); + mlx5_del_flow_rules(e->fwd_rule); + mlx5_destroy_flow_group(e->fwd_grp); + mlx5_destroy_flow_table(e->ft); + kfree(e); +out: + mutex_unlock(&esw->fdb_table.offloads.indir->lock); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h new file mode 100644 index 0000000000..036f5b3a34 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5_ESW_FT_H__ +#define __MLX5_ESW_FT_H__ + +#ifdef CONFIG_MLX5_CLS_ACT + +struct mlx5_esw_indir_table * +mlx5_esw_indir_table_init(void); +void +mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir); + +struct mlx5_flow_table *mlx5_esw_indir_table_get(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + u16 vport, bool decap); +void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw, + u16 vport, bool decap); + +bool +mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + u16 vport_num, + struct mlx5_core_dev *dest_mdev); + +u16 +mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr); + +#else +/* indir API stubs */ +static inline struct mlx5_esw_indir_table * +mlx5_esw_indir_table_init(void) +{ + return NULL; +} + +static inline void +mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir) +{ +} + +static inline struct mlx5_flow_table * +mlx5_esw_indir_table_get(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + u16 vport, bool decap) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void +mlx5_esw_indir_table_put(struct mlx5_eswitch *esw, + u16 vport, bool decap) +{ +} + +static inline bool +mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + u16 vport_num, + struct mlx5_core_dev *dest_mdev) +{ + return false; +} + +static inline u16 +mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr) +{ + return 0; +} +#endif + +#endif /* __MLX5_ESW_FT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec.c new file mode 100644 index 0000000000..da10e04777 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec.c @@ -0,0 +1,369 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include +#include +#include "mlx5_core.h" +#include "eswitch.h" + +static int esw_ipsec_vf_query_generic(struct mlx5_core_dev *dev, u16 vport_num, bool *result) +{ + int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *hca_cap, *query_cap; + int err; + + if (!MLX5_CAP_GEN(dev, vhca_resource_manager)) + return -EOPNOTSUPP; + + if (!mlx5_esw_ipsec_vf_offload_supported(dev)) { + *result = false; + return 0; + } + + query_cap = kvzalloc(query_sz, GFP_KERNEL); + if (!query_cap) + return -ENOMEM; + + err = mlx5_vport_get_other_func_general_cap(dev, vport_num, query_cap); + if (err) + goto free; + + hca_cap = MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability); + *result = MLX5_GET(cmd_hca_cap, hca_cap, ipsec_offload); +free: + kvfree(query_cap); + return err; +} + +enum esw_vport_ipsec_offload { + MLX5_ESW_VPORT_IPSEC_CRYPTO_OFFLOAD, + MLX5_ESW_VPORT_IPSEC_PACKET_OFFLOAD, +}; + +int mlx5_esw_ipsec_vf_offload_get(struct mlx5_core_dev *dev, struct mlx5_vport *vport) +{ + int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *hca_cap, *query_cap; + bool ipsec_enabled; + int err; + + /* Querying IPsec caps only makes sense when generic ipsec_offload + * HCA cap is enabled + */ + err = esw_ipsec_vf_query_generic(dev, vport->vport, &ipsec_enabled); + if (err) + return err; + + if (!ipsec_enabled) { + vport->info.ipsec_crypto_enabled = false; + vport->info.ipsec_packet_enabled = false; + return 0; + } + + query_cap = kvzalloc(query_sz, GFP_KERNEL); + if (!query_cap) + return -ENOMEM; + + err = mlx5_vport_get_other_func_cap(dev, vport->vport, query_cap, MLX5_CAP_IPSEC); + if (err) + goto free; + + hca_cap = MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability); + vport->info.ipsec_crypto_enabled = + MLX5_GET(ipsec_cap, hca_cap, ipsec_crypto_offload); + vport->info.ipsec_packet_enabled = + MLX5_GET(ipsec_cap, hca_cap, ipsec_full_offload); +free: + kvfree(query_cap); + return err; +} + +static int esw_ipsec_vf_set_generic(struct mlx5_core_dev *dev, u16 vport_num, bool ipsec_ofld) +{ + int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + void *hca_cap, *query_cap, *cap; + int ret; + + if (!MLX5_CAP_GEN(dev, vhca_resource_manager)) + return -EOPNOTSUPP; + + query_cap = kvzalloc(query_sz, GFP_KERNEL); + hca_cap = kvzalloc(set_sz, GFP_KERNEL); + if (!hca_cap || !query_cap) { + ret = -ENOMEM; + goto free; + } + + ret = mlx5_vport_get_other_func_general_cap(dev, vport_num, query_cap); + if (ret) + goto free; + + cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability); + memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability), + MLX5_UN_SZ_BYTES(hca_cap_union)); + MLX5_SET(cmd_hca_cap, cap, ipsec_offload, ipsec_ofld); + + MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP); + MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1); + MLX5_SET(set_hca_cap_in, hca_cap, function_id, vport_num); + + MLX5_SET(set_hca_cap_in, hca_cap, op_mod, + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1); + ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap); +free: + kvfree(hca_cap); + kvfree(query_cap); + return ret; +} + +static int esw_ipsec_vf_set_bytype(struct mlx5_core_dev *dev, struct mlx5_vport *vport, + bool enable, enum esw_vport_ipsec_offload type) +{ + int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + void *hca_cap, *query_cap, *cap; + int ret; + + if (!MLX5_CAP_GEN(dev, vhca_resource_manager)) + return -EOPNOTSUPP; + + query_cap = kvzalloc(query_sz, GFP_KERNEL); + hca_cap = kvzalloc(set_sz, GFP_KERNEL); + if (!hca_cap || !query_cap) { + ret = -ENOMEM; + goto free; + } + + ret = mlx5_vport_get_other_func_cap(dev, vport->vport, query_cap, MLX5_CAP_IPSEC); + if (ret) + goto free; + + cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability); + memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability), + MLX5_UN_SZ_BYTES(hca_cap_union)); + + switch (type) { + case MLX5_ESW_VPORT_IPSEC_CRYPTO_OFFLOAD: + MLX5_SET(ipsec_cap, cap, ipsec_crypto_offload, enable); + break; + case MLX5_ESW_VPORT_IPSEC_PACKET_OFFLOAD: + MLX5_SET(ipsec_cap, cap, ipsec_full_offload, enable); + break; + default: + ret = -EOPNOTSUPP; + goto free; + } + + MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP); + MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1); + MLX5_SET(set_hca_cap_in, hca_cap, function_id, vport->vport); + + MLX5_SET(set_hca_cap_in, hca_cap, op_mod, + MLX5_SET_HCA_CAP_OP_MOD_IPSEC << 1); + ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap); +free: + kvfree(hca_cap); + kvfree(query_cap); + return ret; +} + +static int esw_ipsec_vf_crypto_aux_caps_set(struct mlx5_core_dev *dev, u16 vport_num, bool enable) +{ + int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + struct mlx5_eswitch *esw = dev->priv.eswitch; + void *hca_cap, *query_cap, *cap; + int ret; + + query_cap = kvzalloc(query_sz, GFP_KERNEL); + hca_cap = kvzalloc(set_sz, GFP_KERNEL); + if (!hca_cap || !query_cap) { + ret = -ENOMEM; + goto free; + } + + ret = mlx5_vport_get_other_func_cap(dev, vport_num, query_cap, MLX5_CAP_ETHERNET_OFFLOADS); + if (ret) + goto free; + + cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability); + memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability), + MLX5_UN_SZ_BYTES(hca_cap_union)); + MLX5_SET(per_protocol_networking_offload_caps, cap, insert_trailer, enable); + MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP); + MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1); + MLX5_SET(set_hca_cap_in, hca_cap, function_id, vport_num); + MLX5_SET(set_hca_cap_in, hca_cap, op_mod, + MLX5_SET_HCA_CAP_OP_MOD_ETHERNET_OFFLOADS << 1); + ret = mlx5_cmd_exec_in(esw->dev, set_hca_cap, hca_cap); +free: + kvfree(hca_cap); + kvfree(query_cap); + return ret; +} + +static int esw_ipsec_vf_offload_set_bytype(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + bool enable, enum esw_vport_ipsec_offload type) +{ + struct mlx5_core_dev *dev = esw->dev; + int err; + + if (vport->vport == MLX5_VPORT_PF) + return -EOPNOTSUPP; + + if (type == MLX5_ESW_VPORT_IPSEC_CRYPTO_OFFLOAD) { + err = esw_ipsec_vf_crypto_aux_caps_set(dev, vport->vport, enable); + if (err) + return err; + } + + if (enable) { + err = esw_ipsec_vf_set_generic(dev, vport->vport, enable); + if (err) + return err; + err = esw_ipsec_vf_set_bytype(dev, vport, enable, type); + if (err) + return err; + } else { + err = esw_ipsec_vf_set_bytype(dev, vport, enable, type); + if (err) + return err; + err = mlx5_esw_ipsec_vf_offload_get(dev, vport); + if (err) + return err; + + /* The generic ipsec_offload cap can be disabled only if both + * ipsec_crypto_offload and ipsec_full_offload aren't enabled. + */ + if (!vport->info.ipsec_crypto_enabled && + !vport->info.ipsec_packet_enabled) { + err = esw_ipsec_vf_set_generic(dev, vport->vport, enable); + if (err) + return err; + } + } + + switch (type) { + case MLX5_ESW_VPORT_IPSEC_CRYPTO_OFFLOAD: + vport->info.ipsec_crypto_enabled = enable; + break; + case MLX5_ESW_VPORT_IPSEC_PACKET_OFFLOAD: + vport->info.ipsec_packet_enabled = enable; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int esw_ipsec_offload_supported(struct mlx5_core_dev *dev, u16 vport_num) +{ + int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *hca_cap, *query_cap; + int ret; + + query_cap = kvzalloc(query_sz, GFP_KERNEL); + if (!query_cap) + return -ENOMEM; + + ret = mlx5_vport_get_other_func_cap(dev, vport_num, query_cap, MLX5_CAP_GENERAL); + if (ret) + goto free; + + hca_cap = MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability); + if (!MLX5_GET(cmd_hca_cap, hca_cap, log_max_dek)) + ret = -EOPNOTSUPP; +free: + kvfree(query_cap); + return ret; +} + +bool mlx5_esw_ipsec_vf_offload_supported(struct mlx5_core_dev *dev) +{ + /* Old firmware doesn't support ipsec_offload capability for VFs. This + * can be detected by checking reformat_add_esp_trasport capability - + * when this cap isn't supported it means firmware cannot be trusted + * about what it reports for ipsec_offload cap. + */ + return MLX5_CAP_FLOWTABLE_NIC_TX(dev, reformat_add_esp_trasport); +} + +int mlx5_esw_ipsec_vf_crypto_offload_supported(struct mlx5_core_dev *dev, + u16 vport_num) +{ + int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *hca_cap, *query_cap; + int err; + + if (!mlx5_esw_ipsec_vf_offload_supported(dev)) + return -EOPNOTSUPP; + + err = esw_ipsec_offload_supported(dev, vport_num); + if (err) + return err; + + query_cap = kvzalloc(query_sz, GFP_KERNEL); + if (!query_cap) + return -ENOMEM; + + err = mlx5_vport_get_other_func_cap(dev, vport_num, query_cap, MLX5_CAP_ETHERNET_OFFLOADS); + if (err) + goto free; + + hca_cap = MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability); + if (!MLX5_GET(per_protocol_networking_offload_caps, hca_cap, swp)) + goto free; + +free: + kvfree(query_cap); + return err; +} + +int mlx5_esw_ipsec_vf_packet_offload_supported(struct mlx5_core_dev *dev, + u16 vport_num) +{ + int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *hca_cap, *query_cap; + int ret; + + if (!mlx5_esw_ipsec_vf_offload_supported(dev)) + return -EOPNOTSUPP; + + ret = esw_ipsec_offload_supported(dev, vport_num); + if (ret) + return ret; + + query_cap = kvzalloc(query_sz, GFP_KERNEL); + if (!query_cap) + return -ENOMEM; + + ret = mlx5_vport_get_other_func_cap(dev, vport_num, query_cap, MLX5_CAP_FLOW_TABLE); + if (ret) + goto out; + + hca_cap = MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability); + if (!MLX5_GET(flow_table_nic_cap, hca_cap, flow_table_properties_nic_receive.decap)) { + ret = -EOPNOTSUPP; + goto out; + } + +out: + kvfree(query_cap); + return ret; +} + +int mlx5_esw_ipsec_vf_crypto_offload_set(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + bool enable) +{ + return esw_ipsec_vf_offload_set_bytype(esw, vport, enable, + MLX5_ESW_VPORT_IPSEC_CRYPTO_OFFLOAD); +} + +int mlx5_esw_ipsec_vf_packet_offload_set(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + bool enable) +{ + return esw_ipsec_vf_offload_set_bytype(esw, vport, enable, + MLX5_ESW_VPORT_IPSEC_PACKET_OFFLOAD); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c new file mode 100644 index 0000000000..d5d33c3b3a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "fs_core.h" +#include "eswitch.h" +#include "en_accel/ipsec.h" +#include "esw/ipsec_fs.h" +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) +#include "en/tc_priv.h" +#endif + +enum { + MLX5_ESW_IPSEC_RX_POL_FT_LEVEL, + MLX5_ESW_IPSEC_RX_ESP_FT_LEVEL, + MLX5_ESW_IPSEC_RX_ESP_FT_CHK_LEVEL, +}; + +enum { + MLX5_ESW_IPSEC_TX_POL_FT_LEVEL, + MLX5_ESW_IPSEC_TX_ESP_FT_LEVEL, + MLX5_ESW_IPSEC_TX_ESP_FT_CNT_LEVEL, +}; + +void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx_create_attr *attr) +{ + attr->prio = FDB_CRYPTO_INGRESS; + attr->pol_level = MLX5_ESW_IPSEC_RX_POL_FT_LEVEL; + attr->sa_level = MLX5_ESW_IPSEC_RX_ESP_FT_LEVEL; + attr->status_level = MLX5_ESW_IPSEC_RX_ESP_FT_CHK_LEVEL; + attr->chains_ns = MLX5_FLOW_NAMESPACE_FDB; +} + +int mlx5_esw_ipsec_rx_status_pass_dest_get(struct mlx5e_ipsec *ipsec, + struct mlx5_flow_destination *dest) +{ + dest->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest->ft = mlx5_chains_get_table(esw_chains(ipsec->mdev->priv.eswitch), 0, 1, 0); + + return 0; +} + +int mlx5_esw_ipsec_rx_setup_modify_header(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_flow_act *flow_act) +{ + u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5_modify_hdr *modify_hdr; + u32 mapped_id; + int err; + + err = xa_alloc_bh(&ipsec->rx_esw->ipsec_obj_id_map, &mapped_id, + xa_mk_value(sa_entry->ipsec_obj_id), + XA_LIMIT(1, ESW_IPSEC_RX_MAPPED_ID_MASK), 0); + if (err) + return err; + + /* reuse tunnel bits for ipsec, + * tun_id is always 0 and tun_opts is mapped to ipsec_obj_id. + */ + MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action, field, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_1); + MLX5_SET(set_action_in, action, offset, ESW_ZONE_ID_BITS); + MLX5_SET(set_action_in, action, length, + ESW_TUN_ID_BITS + ESW_TUN_OPTS_BITS); + MLX5_SET(set_action_in, action, data, mapped_id); + + modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_FDB, + 1, action); + if (IS_ERR(modify_hdr)) { + err = PTR_ERR(modify_hdr); + goto err_header_alloc; + } + + sa_entry->rx_mapped_id = mapped_id; + flow_act->modify_hdr = modify_hdr; + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + return 0; + +err_header_alloc: + xa_erase_bh(&ipsec->rx_esw->ipsec_obj_id_map, mapped_id); + return err; +} + +void mlx5_esw_ipsec_rx_id_mapping_remove(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + + if (sa_entry->rx_mapped_id) + xa_erase_bh(&ipsec->rx_esw->ipsec_obj_id_map, + sa_entry->rx_mapped_id); +} + +int mlx5_esw_ipsec_rx_ipsec_obj_id_search(struct mlx5e_priv *priv, u32 id, + u32 *ipsec_obj_id) +{ + struct mlx5e_ipsec *ipsec = priv->ipsec; + void *val; + + val = xa_load(&ipsec->rx_esw->ipsec_obj_id_map, id); + if (!val) + return -ENOENT; + + *ipsec_obj_id = xa_to_value(val); + + return 0; +} + +void mlx5_esw_ipsec_tx_create_attr_set(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_tx_create_attr *attr) +{ + attr->prio = FDB_CRYPTO_EGRESS; + attr->pol_level = MLX5_ESW_IPSEC_TX_POL_FT_LEVEL; + attr->sa_level = MLX5_ESW_IPSEC_TX_ESP_FT_LEVEL; + attr->cnt_level = MLX5_ESW_IPSEC_TX_ESP_FT_CNT_LEVEL; + attr->chains_ns = MLX5_FLOW_NAMESPACE_FDB; +} + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) +static int mlx5_esw_ipsec_modify_flow_dests(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5_flow_attr *attr; + int err; + + attr = flow->attr; + esw_attr = attr->esw_attr; + if (esw_attr->out_count - esw_attr->split_count > 1) + return 0; + + err = mlx5_eswitch_restore_ipsec_rule(esw, flow->rule[0], esw_attr, + esw_attr->out_count - 1); + + return err; +} +#endif + +void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev) +{ +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + struct mlx5_eswitch *esw = mdev->priv.eswitch; + struct mlx5_eswitch_rep *rep; + struct mlx5e_rep_priv *rpriv; + struct rhashtable_iter iter; + struct mlx5e_tc_flow *flow; + unsigned long i; + int err; + + xa_for_each(&esw->offloads.vport_reps, i, rep) { + rpriv = rep->rep_data[REP_ETH].priv; + if (!rpriv || !rpriv->netdev || !atomic_read(&rpriv->tc_ht.nelems)) + continue; + + rhashtable_walk_enter(&rpriv->tc_ht, &iter); + rhashtable_walk_start(&iter); + while ((flow = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(flow)) + continue; + + err = mlx5_esw_ipsec_modify_flow_dests(esw, flow); + if (err) + mlx5_core_warn_once(mdev, + "Failed to modify flow dests for IPsec"); + } + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); + } +#endif +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h new file mode 100644 index 0000000000..ac9c65b891 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_ESW_IPSEC_FS_H__ +#define __MLX5_ESW_IPSEC_FS_H__ + +struct mlx5e_ipsec; +struct mlx5e_ipsec_sa_entry; + +#ifdef CONFIG_MLX5_ESWITCH +void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx_create_attr *attr); +int mlx5_esw_ipsec_rx_status_pass_dest_get(struct mlx5e_ipsec *ipsec, + struct mlx5_flow_destination *dest); +int mlx5_esw_ipsec_rx_setup_modify_header(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_flow_act *flow_act); +void mlx5_esw_ipsec_rx_id_mapping_remove(struct mlx5e_ipsec_sa_entry *sa_entry); +int mlx5_esw_ipsec_rx_ipsec_obj_id_search(struct mlx5e_priv *priv, u32 id, + u32 *ipsec_obj_id); +void mlx5_esw_ipsec_tx_create_attr_set(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_tx_create_attr *attr); +void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev); +#else +static inline void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx_create_attr *attr) {} + +static inline int mlx5_esw_ipsec_rx_status_pass_dest_get(struct mlx5e_ipsec *ipsec, + struct mlx5_flow_destination *dest) +{ + return -EINVAL; +} + +static inline int mlx5_esw_ipsec_rx_setup_modify_header(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_flow_act *flow_act) +{ + return -EINVAL; +} + +static inline void mlx5_esw_ipsec_rx_id_mapping_remove(struct mlx5e_ipsec_sa_entry *sa_entry) {} + +static inline int mlx5_esw_ipsec_rx_ipsec_obj_id_search(struct mlx5e_priv *priv, u32 id, + u32 *ipsec_obj_id) +{ + return -EINVAL; +} + +static inline void mlx5_esw_ipsec_tx_create_attr_set(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_tx_create_attr *attr) {} + +static inline void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev) {} +#endif /* CONFIG_MLX5_ESWITCH */ +#endif /* __MLX5_ESW_IPSEC_FS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c new file mode 100644 index 0000000000..255bc8b749 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -0,0 +1,527 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021 Mellanox Technologies Ltd */ + +#include +#include +#include +#include +#include +#include "esw/acl/lgcy.h" +#include "esw/legacy.h" +#include "mlx5_core.h" +#include "eswitch.h" +#include "fs_core.h" +#include "fs_ft_pool.h" +#include "esw/qos.h" + +enum { + LEGACY_VEPA_PRIO = 0, + LEGACY_FDB_PRIO, +}; + +static int esw_create_legacy_vepa_table(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *fdb; + int err; + + root_ns = mlx5_get_fdb_sub_ns(dev, 0); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + return -EOPNOTSUPP; + } + + /* num FTE 2, num FG 2 */ + ft_attr.prio = LEGACY_VEPA_PRIO; + ft_attr.max_fte = 2; + ft_attr.autogroup.max_num_groups = 2; + fdb = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create VEPA FDB err %d\n", err); + return err; + } + esw->fdb_table.legacy.vepa_fdb = fdb; + + return 0; +} + +static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw) +{ + esw_debug(esw->dev, "Destroy FDB Table\n"); + if (!esw->fdb_table.legacy.fdb) + return; + + if (esw->fdb_table.legacy.promisc_grp) + mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp); + if (esw->fdb_table.legacy.allmulti_grp) + mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); + if (esw->fdb_table.legacy.addr_grp) + mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); + mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb); + + esw->fdb_table.legacy.fdb = NULL; + esw->fdb_table.legacy.addr_grp = NULL; + esw->fdb_table.legacy.allmulti_grp = NULL; + esw->fdb_table.legacy.promisc_grp = NULL; + atomic64_set(&esw->user_count, 0); +} + +static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *fdb; + struct mlx5_flow_group *g; + void *match_criteria; + int table_size; + u32 *flow_group_in; + u8 *dmac; + int err = 0; + + esw_debug(dev, "Create FDB log_max_size(%d)\n", + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + + root_ns = mlx5_get_fdb_sub_ns(dev, 0); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + return -EOPNOTSUPP; + } + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + ft_attr.max_fte = POOL_NEXT_SIZE; + ft_attr.prio = LEGACY_FDB_PRIO; + fdb = mlx5_create_flow_table(root_ns, &ft_attr); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create FDB Table err %d\n", err); + goto out; + } + esw->fdb_table.legacy.fdb = fdb; + table_size = fdb->max_fte; + + /* Addresses group : Full match unicast/multicast addresses */ + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, outer_headers.dmac_47_16); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + /* Preserve 2 entries for allmulti and promisc rules*/ + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 3); + eth_broadcast_addr(dmac); + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create flow group err(%d)\n", err); + goto out; + } + esw->fdb_table.legacy.addr_grp = g; + + /* Allmulti group : One rule that forwards any mcast traffic */ + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 2); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 2); + eth_zero_addr(dmac); + dmac[0] = 0x01; + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err); + goto out; + } + esw->fdb_table.legacy.allmulti_grp = g; + + /* Promiscuous group : + * One rule that forward all unmatched traffic from previous groups + */ + eth_zero_addr(dmac); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1); + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err); + goto out; + } + esw->fdb_table.legacy.promisc_grp = g; + +out: + if (err) + esw_destroy_legacy_fdb_table(esw); + + kvfree(flow_group_in); + return err; +} + +static void esw_destroy_legacy_vepa_table(struct mlx5_eswitch *esw) +{ + esw_debug(esw->dev, "Destroy VEPA Table\n"); + if (!esw->fdb_table.legacy.vepa_fdb) + return; + + mlx5_destroy_flow_table(esw->fdb_table.legacy.vepa_fdb); + esw->fdb_table.legacy.vepa_fdb = NULL; +} + +static int esw_create_legacy_table(struct mlx5_eswitch *esw) +{ + int err; + + memset(&esw->fdb_table.legacy, 0, sizeof(struct legacy_fdb)); + atomic64_set(&esw->user_count, 0); + + err = esw_create_legacy_vepa_table(esw); + if (err) + return err; + + err = esw_create_legacy_fdb_table(esw); + if (err) + esw_destroy_legacy_vepa_table(esw); + + return err; +} + +static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw) +{ + if (esw->fdb_table.legacy.vepa_uplink_rule) + mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_uplink_rule); + + if (esw->fdb_table.legacy.vepa_star_rule) + mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_star_rule); + + esw->fdb_table.legacy.vepa_uplink_rule = NULL; + esw->fdb_table.legacy.vepa_star_rule = NULL; +} + +static void esw_destroy_legacy_table(struct mlx5_eswitch *esw) +{ + esw_cleanup_vepa_rules(esw); + esw_destroy_legacy_fdb_table(esw); + esw_destroy_legacy_vepa_table(esw); +} + +#define MLX5_LEGACY_SRIOV_VPORT_EVENTS (MLX5_VPORT_UC_ADDR_CHANGE | \ + MLX5_VPORT_MC_ADDR_CHANGE | \ + MLX5_VPORT_PROMISC_CHANGE) + +int esw_legacy_enable(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + int ret; + + ret = esw_create_legacy_table(esw); + if (ret) + return ret; + + mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; + + ret = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_LEGACY_SRIOV_VPORT_EVENTS); + if (ret) + esw_destroy_legacy_table(esw); + return ret; +} + +void esw_legacy_disable(struct mlx5_eswitch *esw) +{ + struct esw_mc_addr *mc_promisc; + + mlx5_eswitch_disable_pf_vf_vports(esw); + + mc_promisc = &esw->mc_promisc; + if (mc_promisc->uplink_rule) + mlx5_del_flow_rules(mc_promisc->uplink_rule); + + esw_destroy_legacy_table(esw); +} + +static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw, + u8 setting) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_spec *spec; + int err = 0; + void *misc; + + if (!setting) { + esw_cleanup_vepa_rules(esw); + return 0; + } + + if (esw->fdb_table.legacy.vepa_uplink_rule) + return 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + /* Uplink rule forward uplink traffic to FDB */ + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = esw->fdb_table.legacy.fdb; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, spec, + &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + goto out; + } + esw->fdb_table.legacy.vepa_uplink_rule = flow_rule; + + /* Star rule to forward all traffic to uplink vport */ + memset(&dest, 0, sizeof(dest)); + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = MLX5_VPORT_UPLINK; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, NULL, + &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + goto out; + } + esw->fdb_table.legacy.vepa_star_rule = flow_rule; + +out: + kvfree(spec); + if (err) + esw_cleanup_vepa_rules(esw); + return err; +} + +int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting) +{ + int err = 0; + + if (!esw) + return -EOPNOTSUPP; + + if (!mlx5_esw_allowed(esw)) + return -EPERM; + + mutex_lock(&esw->state_lock); + if (esw->mode != MLX5_ESWITCH_LEGACY) { + err = -EOPNOTSUPP; + goto out; + } + + err = _mlx5_eswitch_set_vepa_locked(esw, setting); + +out: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting) +{ + if (!esw) + return -EOPNOTSUPP; + + if (!mlx5_esw_allowed(esw)) + return -EPERM; + + if (esw->mode != MLX5_ESWITCH_LEGACY) + return -EOPNOTSUPP; + + *setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0; + return 0; +} + +int esw_legacy_vport_acl_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + int ret; + + /* Only non manager vports need ACL in legacy mode */ + if (mlx5_esw_is_manager_vport(esw, vport->vport)) + return 0; + + ret = esw_acl_ingress_lgcy_setup(esw, vport); + if (ret) + goto ingress_err; + + ret = esw_acl_egress_lgcy_setup(esw, vport); + if (ret) + goto egress_err; + + return 0; + +egress_err: + esw_acl_ingress_lgcy_cleanup(esw, vport); +ingress_err: + return ret; +} + +void esw_legacy_vport_acl_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + if (mlx5_esw_is_manager_vport(esw, vport->vport)) + return; + + esw_acl_egress_lgcy_cleanup(esw, vport); + esw_acl_ingress_lgcy_cleanup(esw, vport); +} + +int mlx5_esw_query_vport_drop_stats(struct mlx5_core_dev *dev, + struct mlx5_vport *vport, + struct mlx5_vport_drop_stats *stats) +{ + u64 rx_discard_vport_down, tx_discard_vport_down; + struct mlx5_eswitch *esw = dev->priv.eswitch; + u64 bytes = 0; + int err = 0; + + if (esw->mode != MLX5_ESWITCH_LEGACY) + return 0; + + mutex_lock(&esw->state_lock); + if (!vport->enabled) + goto unlock; + + if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_counter)) + mlx5_fc_query(dev, vport->egress.legacy.drop_counter, + &stats->rx_dropped, &bytes); + + if (vport->ingress.legacy.drop_counter) + mlx5_fc_query(dev, vport->ingress.legacy.drop_counter, + &stats->tx_dropped, &bytes); + + if (!MLX5_CAP_GEN(dev, receive_discard_vport_down) && + !MLX5_CAP_GEN(dev, transmit_discard_vport_down)) + goto unlock; + + err = mlx5_query_vport_down_stats(dev, vport->vport, 1, + &rx_discard_vport_down, + &tx_discard_vport_down); + if (err) + goto unlock; + + if (MLX5_CAP_GEN(dev, receive_discard_vport_down)) + stats->rx_dropped += rx_discard_vport_down; + if (MLX5_CAP_GEN(dev, transmit_discard_vport_down)) + stats->tx_dropped += tx_discard_vport_down; + +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + u16 vport, u16 vlan, u8 qos) +{ + u8 set_flags = 0; + int err = 0; + + if (!mlx5_esw_allowed(esw)) + return vlan ? -EPERM : 0; + + if (vlan || qos) + set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT; + + mutex_lock(&esw->state_lock); + if (esw->mode != MLX5_ESWITCH_LEGACY) { + if (!vlan) + goto unlock; /* compatibility with libvirt */ + + err = -EOPNOTSUPP; + goto unlock; + } + + err = __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags); + +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, + u16 vport, bool spoofchk) +{ + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + bool pschk; + int err = 0; + + if (!mlx5_esw_allowed(esw)) + return -EPERM; + if (IS_ERR(evport)) + return PTR_ERR(evport); + + mutex_lock(&esw->state_lock); + if (esw->mode != MLX5_ESWITCH_LEGACY) { + err = -EOPNOTSUPP; + goto unlock; + } + pschk = evport->info.spoofchk; + evport->info.spoofchk = spoofchk; + if (pschk && !is_valid_ether_addr(evport->info.mac)) + mlx5_core_warn(esw->dev, + "Spoofchk in set while MAC is invalid, vport(%d)\n", + evport->vport); + if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) + err = esw_acl_ingress_lgcy_setup(esw, evport); + if (err) + evport->info.spoofchk = pschk; + +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, + u16 vport, bool setting) +{ + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + int err = 0; + + if (!mlx5_esw_allowed(esw)) + return -EPERM; + if (IS_ERR(evport)) + return PTR_ERR(evport); + + mutex_lock(&esw->state_lock); + if (esw->mode != MLX5_ESWITCH_LEGACY) { + err = -EOPNOTSUPP; + goto unlock; + } + evport->info.trusted = setting; + if (evport->enabled) + esw_vport_change_handle_locked(evport); + +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, + u32 max_rate, u32 min_rate) +{ + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + int err; + + if (!mlx5_esw_allowed(esw)) + return -EPERM; + if (IS_ERR(evport)) + return PTR_ERR(evport); + + mutex_lock(&esw->state_lock); + err = mlx5_esw_qos_set_vport_rate(esw, evport, max_rate, min_rate); + mutex_unlock(&esw->state_lock); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.h new file mode 100644 index 0000000000..e0820bb72b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies Ltd */ + +#ifndef __MLX5_ESW_LEGACY_H__ +#define __MLX5_ESW_LEGACY_H__ + +#define MLX5_LEGACY_SRIOV_VPORT_EVENTS (MLX5_VPORT_UC_ADDR_CHANGE | \ + MLX5_VPORT_MC_ADDR_CHANGE | \ + MLX5_VPORT_PROMISC_CHANGE) + +struct mlx5_eswitch; + +int esw_legacy_enable(struct mlx5_eswitch *esw); +void esw_legacy_disable(struct mlx5_eswitch *esw); + +int esw_legacy_vport_acl_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void esw_legacy_vport_acl_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); + +int mlx5_esw_query_vport_drop_stats(struct mlx5_core_dev *dev, + struct mlx5_vport *vport, + struct mlx5_vport_drop_stats *stats); +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c new file mode 100644 index 0000000000..1887a24ee4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -0,0 +1,943 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "eswitch.h" +#include "esw/qos.h" +#include "en/port.h" +#define CREATE_TRACE_POINTS +#include "diag/qos_tracepoint.h" + +/* Minimum supported BW share value by the HW is 1 Mbit/sec */ +#define MLX5_MIN_BW_SHARE 1 + +#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ + min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit) + +struct mlx5_esw_rate_group { + u32 tsar_ix; + u32 max_rate; + u32 min_rate; + u32 bw_share; + struct list_head list; +}; + +static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx, + u32 tsar_ix, u32 max_rate, u32 bw_share) +{ + u32 bitmask = 0; + + if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) + return -EOPNOTSUPP; + + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE; + + return mlx5_modify_scheduling_element_cmd(dev, + SCHEDULING_HIERARCHY_E_SWITCH, + sched_ctx, + tsar_ix, + bitmask); +} + +static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, + u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_core_dev *dev = esw->dev; + int err; + + err = esw_qos_tsar_config(dev, sched_ctx, + group->tsar_ix, + max_rate, bw_share); + if (err) + NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed"); + + trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate); + + return err; +} + +static int esw_qos_vport_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + u32 max_rate, u32 bw_share, + struct netlink_ext_ack *extack) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_core_dev *dev = esw->dev; + int err; + + if (!vport->qos.enabled) + return -EIO; + + err = esw_qos_tsar_config(dev, sched_ctx, vport->qos.esw_tsar_ix, + max_rate, bw_share); + if (err) { + esw_warn(esw->dev, + "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n", + vport->vport, err); + NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed"); + return err; + } + + trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate); + + return 0; +} + +static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + bool group_level) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + struct mlx5_vport *evport; + u32 max_guarantee = 0; + unsigned long i; + + if (group_level) { + struct mlx5_esw_rate_group *group; + + list_for_each_entry(group, &esw->qos.groups, list) { + if (group->min_rate < max_guarantee) + continue; + max_guarantee = group->min_rate; + } + } else { + mlx5_esw_for_each_vport(esw, i, evport) { + if (!evport->enabled || !evport->qos.enabled || + evport->qos.group != group || evport->qos.min_rate < max_guarantee) + continue; + max_guarantee = evport->qos.min_rate; + } + } + + if (max_guarantee) + return max_t(u32, max_guarantee / fw_max_bw_share, 1); + + /* If vports min rate divider is 0 but their group has bw_share configured, then + * need to set bw_share for vports to minimal value. + */ + if (!group_level && !max_guarantee && group && group->bw_share) + return 1; + return 0; +} + +static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max) +{ + if (divider) + return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max); + + return 0; +} + +static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false); + struct mlx5_vport *evport; + unsigned long i; + u32 bw_share; + int err; + + mlx5_esw_for_each_vport(esw, i, evport) { + if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group) + continue; + bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share); + + if (bw_share == evport->qos.bw_share) + continue; + + err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack); + if (err) + return err; + + evport->qos.bw_share = bw_share; + } + + return 0; +} + +static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider, + struct netlink_ext_ack *extack) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + struct mlx5_esw_rate_group *group; + u32 bw_share; + int err; + + list_for_each_entry(group, &esw->qos.groups, list) { + bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share); + + if (bw_share == group->bw_share) + continue; + + err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack); + if (err) + return err; + + group->bw_share = bw_share; + + /* All the group's vports need to be set with default bw_share + * to enable them with QOS + */ + err = esw_qos_normalize_vports_min_rate(esw, group, extack); + + if (err) + return err; + } + + return 0; +} + +static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, + u32 min_rate, struct netlink_ext_ack *extack) +{ + u32 fw_max_bw_share, previous_min_rate; + bool min_rate_supported; + int err; + + lockdep_assert_held(&esw->state_lock); + fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && + fw_max_bw_share >= MLX5_MIN_BW_SHARE; + if (min_rate && !min_rate_supported) + return -EOPNOTSUPP; + if (min_rate == evport->qos.min_rate) + return 0; + + previous_min_rate = evport->qos.min_rate; + evport->qos.min_rate = min_rate; + err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack); + if (err) + evport->qos.min_rate = previous_min_rate; + + return err; +} + +static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, + u32 max_rate, struct netlink_ext_ack *extack) +{ + u32 act_max_rate = max_rate; + bool max_rate_supported; + int err; + + lockdep_assert_held(&esw->state_lock); + max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit); + + if (max_rate && !max_rate_supported) + return -EOPNOTSUPP; + if (max_rate == evport->qos.max_rate) + return 0; + + /* If parent group has rate limit need to set to group + * value when new max rate is 0. + */ + if (evport->qos.group && !max_rate) + act_max_rate = evport->qos.group->max_rate; + + err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack); + + if (!err) + evport->qos.max_rate = max_rate; + + return err; +} + +static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, + u32 min_rate, struct netlink_ext_ack *extack) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + struct mlx5_core_dev *dev = esw->dev; + u32 previous_min_rate, divider; + int err; + + if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE)) + return -EOPNOTSUPP; + + if (min_rate == group->min_rate) + return 0; + + previous_min_rate = group->min_rate; + group->min_rate = min_rate; + divider = esw_qos_calculate_min_rate_divider(esw, group, true); + err = esw_qos_normalize_groups_min_rate(esw, divider, extack); + if (err) { + group->min_rate = previous_min_rate; + NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed"); + + /* Attempt restoring previous configuration */ + divider = esw_qos_calculate_min_rate_divider(esw, group, true); + if (esw_qos_normalize_groups_min_rate(esw, divider, extack)) + NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed"); + } + + return err; +} + +static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + u32 max_rate, struct netlink_ext_ack *extack) +{ + struct mlx5_vport *vport; + unsigned long i; + int err; + + if (group->max_rate == max_rate) + return 0; + + err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack); + if (err) + return err; + + group->max_rate = max_rate; + + /* Any unlimited vports in the group should be set + * with the value of the group. + */ + mlx5_esw_for_each_vport(esw, i, vport) { + if (!vport->enabled || !vport->qos.enabled || + vport->qos.group != group || vport->qos.max_rate) + continue; + + err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack); + if (err) + NL_SET_ERR_MSG_MOD(extack, + "E-Switch vport implicit rate limit setting failed"); + } + + return err; +} + +static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + u32 max_rate, u32 bw_share) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_esw_rate_group *group = vport->qos.group; + struct mlx5_core_dev *dev = esw->dev; + u32 parent_tsar_ix; + void *vport_elem; + int err; + + parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix; + MLX5_SET(scheduling_context, sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); + vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); + MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + + err = mlx5_create_scheduling_element_cmd(dev, + SCHEDULING_HIERARCHY_E_SWITCH, + sched_ctx, + &vport->qos.esw_tsar_ix); + if (err) { + esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n", + vport->vport, err); + return err; + } + + return 0; +} + +static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_esw_rate_group *curr_group, + struct mlx5_esw_rate_group *new_group, + struct netlink_ext_ack *extack) +{ + u32 max_rate; + int err; + + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + vport->qos.esw_tsar_ix); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed"); + return err; + } + + vport->qos.group = new_group; + max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate; + + /* If vport is unlimited, we set the group's value. + * Therefore, if the group is limited it will apply to + * the vport as well and if not, vport will remain unlimited. + */ + err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed."); + goto err_sched; + } + + return 0; + +err_sched: + vport->qos.group = curr_group; + max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate; + if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share)) + esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n", + vport->vport); + + return err; +} + +static int esw_qos_vport_update_group(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_rate_group *new_group, *curr_group; + int err; + + if (!vport->enabled) + return -EINVAL; + + curr_group = vport->qos.group; + new_group = group ?: esw->qos.group0; + if (curr_group == new_group) + return 0; + + err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack); + if (err) + return err; + + /* Recalculate bw share weights of old and new groups */ + if (vport->qos.bw_share || new_group->bw_share) { + esw_qos_normalize_vports_min_rate(esw, curr_group, extack); + esw_qos_normalize_vports_min_rate(esw, new_group, extack); + } + + return 0; +} + +static struct mlx5_esw_rate_group * +__esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) +{ + u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_esw_rate_group *group; + u32 divider; + int err; + + group = kzalloc(sizeof(*group), GFP_KERNEL); + if (!group) + return ERR_PTR(-ENOMEM); + + MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, + esw->qos.root_tsar_ix); + err = mlx5_create_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + tsar_ctx, + &group->tsar_ix); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed"); + goto err_sched_elem; + } + + list_add_tail(&group->list, &esw->qos.groups); + + divider = esw_qos_calculate_min_rate_divider(esw, group, true); + if (divider) { + err = esw_qos_normalize_groups_min_rate(esw, divider, extack); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed"); + goto err_min_rate; + } + } + trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix); + + return group; + +err_min_rate: + list_del(&group->list); + if (mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + group->tsar_ix)) + NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed"); +err_sched_elem: + kfree(group); + return ERR_PTR(err); +} + +static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack); +static void esw_qos_put(struct mlx5_eswitch *esw); + +static struct mlx5_esw_rate_group * +esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) +{ + struct mlx5_esw_rate_group *group; + int err; + + if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth)) + return ERR_PTR(-EOPNOTSUPP); + + err = esw_qos_get(esw, extack); + if (err) + return ERR_PTR(err); + + group = __esw_qos_create_rate_group(esw, extack); + if (IS_ERR(group)) + esw_qos_put(esw); + + return group; +} + +static int __esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) +{ + u32 divider; + int err; + + list_del(&group->list); + + divider = esw_qos_calculate_min_rate_divider(esw, NULL, true); + err = esw_qos_normalize_groups_min_rate(esw, divider, extack); + if (err) + NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed"); + + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + group->tsar_ix); + if (err) + NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed"); + + trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix); + + kfree(group); + + return err; +} + +static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) +{ + int err; + + err = __esw_qos_destroy_rate_group(esw, group, extack); + esw_qos_put(esw); + + return err; +} + +static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type) +{ + switch (type) { + case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_TASR; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_VPORT; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_VPORT_TC; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC; + } + return false; +} + +static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) +{ + u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_core_dev *dev = esw->dev; + __be32 *attr; + int err; + + if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) + return -EOPNOTSUPP; + + if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR)) + return -EOPNOTSUPP; + + MLX5_SET(scheduling_context, tsar_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); + + attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); + *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16); + + err = mlx5_create_scheduling_element_cmd(dev, + SCHEDULING_HIERARCHY_E_SWITCH, + tsar_ctx, + &esw->qos.root_tsar_ix); + if (err) { + esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err); + return err; + } + + INIT_LIST_HEAD(&esw->qos.groups); + if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) { + esw->qos.group0 = __esw_qos_create_rate_group(esw, extack); + if (IS_ERR(esw->qos.group0)) { + esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n", + PTR_ERR(esw->qos.group0)); + err = PTR_ERR(esw->qos.group0); + goto err_group0; + } + } + refcount_set(&esw->qos.refcnt, 1); + + return 0; + +err_group0: + if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, + esw->qos.root_tsar_ix)) + esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n"); + + return err; +} + +static void esw_qos_destroy(struct mlx5_eswitch *esw) +{ + int err; + + if (esw->qos.group0) + __esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL); + + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + esw->qos.root_tsar_ix); + if (err) + esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err); +} + +static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) +{ + int err = 0; + + lockdep_assert_held(&esw->state_lock); + + if (!refcount_inc_not_zero(&esw->qos.refcnt)) { + /* esw_qos_create() set refcount to 1 only on success. + * No need to decrement on failure. + */ + err = esw_qos_create(esw, extack); + } + + return err; +} + +static void esw_qos_put(struct mlx5_eswitch *esw) +{ + lockdep_assert_held(&esw->state_lock); + if (refcount_dec_and_test(&esw->qos.refcnt)) + esw_qos_destroy(esw); +} + +static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) +{ + int err; + + lockdep_assert_held(&esw->state_lock); + if (vport->qos.enabled) + return 0; + + err = esw_qos_get(esw, extack); + if (err) + return err; + + vport->qos.group = esw->qos.group0; + + err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share); + if (err) + goto err_out; + + vport->qos.enabled = true; + trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate); + + return 0; + +err_out: + esw_qos_put(esw); + + return err; +} + +void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + int err; + + lockdep_assert_held(&esw->state_lock); + if (!vport->qos.enabled) + return; + WARN(vport->qos.group && vport->qos.group != esw->qos.group0, + "Disabling QoS on port before detaching it from group"); + + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + vport->qos.esw_tsar_ix); + if (err) + esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n", + vport->vport, err); + + memset(&vport->qos, 0, sizeof(vport->qos)); + trace_mlx5_esw_vport_qos_destroy(vport); + + esw_qos_put(esw); +} + +int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + u32 max_rate, u32 min_rate) +{ + int err; + + lockdep_assert_held(&esw->state_lock); + err = esw_qos_vport_enable(esw, vport, 0, 0, NULL); + if (err) + return err; + + err = esw_qos_set_vport_min_rate(esw, vport, min_rate, NULL); + if (!err) + err = esw_qos_set_vport_max_rate(esw, vport, max_rate, NULL); + + return err; +} + +int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps) +{ + u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_vport *vport; + u32 bitmask; + int err; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return PTR_ERR(vport); + + mutex_lock(&esw->state_lock); + if (!vport->qos.enabled) { + /* Eswitch QoS wasn't enabled yet. Enable it and vport QoS. */ + err = esw_qos_vport_enable(esw, vport, rate_mbps, vport->qos.bw_share, NULL); + } else { + MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); + + bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; + err = mlx5_modify_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + ctx, + vport->qos.esw_tsar_ix, + bitmask); + } + mutex_unlock(&esw->state_lock); + + return err; +} + +#define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */ + +/* Converts bytes per second value passed in a pointer into megabits per + * second, rewriting last. If converted rate exceed link speed or is not a + * fraction of Mbps - returns error. + */ +static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name, + u64 *rate, struct netlink_ext_ack *extack) +{ + u32 link_speed_max, remainder; + u64 value; + int err; + + err = mlx5_port_max_linkspeed(mdev, &link_speed_max); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed"); + return err; + } + + value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &remainder); + if (remainder) { + pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n", + name, *rate); + NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps"); + return -EINVAL; + } + + if (value > link_speed_max) { + pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n", + name, value, link_speed_max); + NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed"); + return -EINVAL; + } + + *rate = value; + return 0; +} + +/* Eswitch devlink rate API */ + +int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_share, struct netlink_ext_ack *extack) +{ + struct mlx5_vport *vport = priv; + struct mlx5_eswitch *esw; + int err; + + esw = vport->dev->priv.eswitch; + if (!mlx5_esw_allowed(esw)) + return -EPERM; + + err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack); + if (err) + return err; + + mutex_lock(&esw->state_lock); + err = esw_qos_vport_enable(esw, vport, 0, 0, extack); + if (err) + goto unlock; + + err = esw_qos_set_vport_min_rate(esw, vport, tx_share, extack); +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_max, struct netlink_ext_ack *extack) +{ + struct mlx5_vport *vport = priv; + struct mlx5_eswitch *esw; + int err; + + esw = vport->dev->priv.eswitch; + if (!mlx5_esw_allowed(esw)) + return -EPERM; + + err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack); + if (err) + return err; + + mutex_lock(&esw->state_lock); + err = esw_qos_vport_enable(esw, vport, 0, 0, extack); + if (err) + goto unlock; + + err = esw_qos_set_vport_max_rate(esw, vport, tx_max, extack); +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, + u64 tx_share, struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5_esw_rate_group *group = priv; + int err; + + err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack); + if (err) + return err; + + mutex_lock(&esw->state_lock); + err = esw_qos_set_group_min_rate(esw, group, tx_share, extack); + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, + u64 tx_max, struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5_esw_rate_group *group = priv; + int err; + + err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack); + if (err) + return err; + + mutex_lock(&esw->state_lock); + err = esw_qos_set_group_max_rate(esw, group, tx_max, extack); + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_rate_group *group; + struct mlx5_eswitch *esw; + int err = 0; + + esw = mlx5_devlink_eswitch_get(rate_node->devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + mutex_lock(&esw->state_lock); + if (esw->mode != MLX5_ESWITCH_OFFLOADS) { + NL_SET_ERR_MSG_MOD(extack, + "Rate node creation supported only in switchdev mode"); + err = -EOPNOTSUPP; + goto unlock; + } + + group = esw_qos_create_rate_group(esw, extack); + if (IS_ERR(group)) { + err = PTR_ERR(group); + goto unlock; + } + + *priv = group; +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_rate_group *group = priv; + struct mlx5_eswitch *esw; + int err; + + esw = mlx5_devlink_eswitch_get(rate_node->devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + mutex_lock(&esw->state_lock); + err = esw_qos_destroy_rate_group(esw, group, extack); + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) +{ + int err = 0; + + mutex_lock(&esw->state_lock); + if (!vport->qos.enabled && !group) + goto unlock; + + err = esw_qos_vport_enable(esw, vport, 0, 0, extack); + if (!err) + err = esw_qos_vport_update_group(esw, vport, group, extack); +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_rate_group *group; + struct mlx5_vport *vport = priv; + + if (!parent) + return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, + vport, NULL, extack); + + group = parent_priv; + return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h new file mode 100644 index 0000000000..0141e9d520 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_ESW_QOS_H__ +#define __MLX5_ESW_QOS_H__ + +#ifdef CONFIG_MLX5_ESWITCH + +int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, + u32 max_rate, u32 min_rate); +void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport); + +int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_share, struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_max, struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, + u64 tx_share, struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, + u64 tx_max, struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, + struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, + struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack); +#endif + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c new file mode 100644 index 0000000000..749c3957a1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021 Mellanox Technologies. + +#include "eswitch.h" + +/* This struct is used as a key to the hash table and we need it to be packed + * so hash result is consistent + */ +struct mlx5_vport_key { + u32 chain; + u16 prio; + u16 vport; + u16 vhca_id; + struct esw_vport_tbl_namespace *vport_ns; +} __packed; + +struct mlx5_vport_table { + struct hlist_node hlist; + struct mlx5_flow_table *fdb; + u32 num_rules; + struct mlx5_vport_key key; +}; + +static void +esw_vport_tbl_init(struct mlx5_eswitch *esw, struct esw_vport_tbl_namespace *ns) +{ + if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) + ns->flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); +} + +static struct mlx5_flow_table * +esw_vport_tbl_create(struct mlx5_eswitch *esw, struct mlx5_flow_namespace *ns, + const struct esw_vport_tbl_namespace *vport_ns) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_table *fdb; + + if (vport_ns->max_num_groups) + ft_attr.autogroup.max_num_groups = vport_ns->max_num_groups; + else + ft_attr.autogroup.max_num_groups = esw->params.large_group_num; + ft_attr.max_fte = vport_ns->max_fte; + ft_attr.prio = FDB_PER_VPORT; + ft_attr.flags = vport_ns->flags; + fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(fdb)) { + esw_warn(esw->dev, "Failed to create per vport FDB Table err %ld\n", + PTR_ERR(fdb)); + } + + return fdb; +} + +static u32 flow_attr_to_vport_key(struct mlx5_eswitch *esw, + struct mlx5_vport_tbl_attr *attr, + struct mlx5_vport_key *key) +{ + key->vport = attr->vport; + key->chain = attr->chain; + key->prio = attr->prio; + key->vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); + key->vport_ns = attr->vport_ns; + return jhash(key, sizeof(*key), 0); +} + +/* caller must hold vports.lock */ +static struct mlx5_vport_table * +esw_vport_tbl_lookup(struct mlx5_eswitch *esw, struct mlx5_vport_key *skey, u32 key) +{ + struct mlx5_vport_table *e; + + hash_for_each_possible(esw->fdb_table.offloads.vports.table, e, hlist, key) + if (!memcmp(&e->key, skey, sizeof(*skey))) + return e; + + return NULL; +} + +struct mlx5_flow_table * +mlx5_esw_vporttbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr) +{ + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *fdb; + struct mlx5_vport_table *e; + struct mlx5_vport_key skey; + u32 hkey; + + mutex_lock(&esw->fdb_table.offloads.vports.lock); + esw_vport_tbl_init(esw, attr->vport_ns); + hkey = flow_attr_to_vport_key(esw, attr, &skey); + e = esw_vport_tbl_lookup(esw, &skey, hkey); + if (e) { + e->num_rules++; + goto out; + } + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) { + fdb = ERR_PTR(-ENOMEM); + goto err_alloc; + } + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!ns) { + esw_warn(dev, "Failed to get FDB namespace\n"); + fdb = ERR_PTR(-ENOENT); + goto err_ns; + } + + fdb = esw_vport_tbl_create(esw, ns, attr->vport_ns); + if (IS_ERR(fdb)) + goto err_ns; + + e->fdb = fdb; + e->num_rules = 1; + e->key = skey; + hash_add(esw->fdb_table.offloads.vports.table, &e->hlist, hkey); +out: + mutex_unlock(&esw->fdb_table.offloads.vports.lock); + return e->fdb; + +err_ns: + kfree(e); +err_alloc: + mutex_unlock(&esw->fdb_table.offloads.vports.lock); + return fdb; +} + +void +mlx5_esw_vporttbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr) +{ + struct mlx5_vport_table *e; + struct mlx5_vport_key key; + u32 hkey; + + mutex_lock(&esw->fdb_table.offloads.vports.lock); + esw_vport_tbl_init(esw, attr->vport_ns); + hkey = flow_attr_to_vport_key(esw, attr, &key); + e = esw_vport_tbl_lookup(esw, &key, hkey); + if (!e || --e->num_rules) + goto out; + + hash_del(&e->hlist); + mlx5_destroy_flow_table(e->fdb); + kfree(e); +out: + mutex_unlock(&esw->fdb_table.offloads.vports.lock); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c new file mode 100644 index 0000000000..3047d7015c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -0,0 +1,2398 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "esw/acl/lgcy.h" +#include "esw/legacy.h" +#include "esw/qos.h" +#include "mlx5_core.h" +#include "lib/eq.h" +#include "lag/lag.h" +#include "eswitch.h" +#include "fs_core.h" +#include "devlink.h" +#include "ecpf.h" +#include "en/mod_hdr.h" +#include "en_accel/ipsec.h" + +enum { + MLX5_ACTION_NONE = 0, + MLX5_ACTION_ADD = 1, + MLX5_ACTION_DEL = 2, +}; + +/* Vport UC/MC hash node */ +struct vport_addr { + struct l2addr_node node; + u8 action; + u16 vport; + struct mlx5_flow_handle *flow_rule; + bool mpfs; /* UC MAC was added to MPFs */ + /* A flag indicating that mac was added due to mc promiscuous vport */ + bool mc_promisc; +}; + +static int mlx5_eswitch_check(const struct mlx5_core_dev *dev) +{ + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return -EOPNOTSUPP; + + if (!MLX5_ESWITCH_MANAGER(dev)) + return -EOPNOTSUPP; + + return 0; +} + +static struct mlx5_eswitch *__mlx5_devlink_eswitch_get(struct devlink *devlink, bool check) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + int err; + + if (check) { + err = mlx5_eswitch_check(dev); + if (err) + return ERR_PTR(err); + } + + return dev->priv.eswitch; +} + +struct mlx5_eswitch *__must_check +mlx5_devlink_eswitch_get(struct devlink *devlink) +{ + return __mlx5_devlink_eswitch_get(devlink, true); +} + +struct mlx5_eswitch *mlx5_devlink_eswitch_nocheck_get(struct devlink *devlink) +{ + return __mlx5_devlink_eswitch_get(devlink, false); +} + +struct mlx5_vport *__must_check +mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport; + + if (!esw) + return ERR_PTR(-EPERM); + + vport = xa_load(&esw->vports, vport_num); + if (!vport) { + esw_debug(esw->dev, "vport out of range: num(0x%x)\n", vport_num); + return ERR_PTR(-EINVAL); + } + return vport; +} + +static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, + u32 events_mask) +{ + u32 in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {}; + void *nic_vport_ctx; + + MLX5_SET(modify_nic_vport_context_in, in, + opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + if (vport || mlx5_core_is_ecpf(dev)) + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, arm_change_event, 1); + + if (events_mask & MLX5_VPORT_UC_ADDR_CHANGE) + MLX5_SET(nic_vport_context, nic_vport_ctx, + event_on_uc_address_change, 1); + if (events_mask & MLX5_VPORT_MC_ADDR_CHANGE) + MLX5_SET(nic_vport_context, nic_vport_ctx, + event_on_mc_address_change, 1); + if (events_mask & MLX5_VPORT_PROMISC_CHANGE) + MLX5_SET(nic_vport_context, nic_vport_ctx, + event_on_promisc_change, 1); + + return mlx5_cmd_exec_in(dev, modify_nic_vport_context, in); +} + +/* E-Switch vport context HW commands */ +int mlx5_eswitch_modify_esw_vport_context(struct mlx5_core_dev *dev, u16 vport, + bool other_vport, void *in) +{ + MLX5_SET(modify_esw_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT); + MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); + MLX5_SET(modify_esw_vport_context_in, in, other_vport, other_vport); + return mlx5_cmd_exec_in(dev, modify_esw_vport_context, in); +} + +static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport, + u16 vlan, u8 qos, u8 set_flags) +{ + u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {}; + + if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) || + !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) + return -EOPNOTSUPP; + + esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%x\n", + vport, vlan, qos, set_flags); + + if (set_flags & SET_VLAN_STRIP) + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_strip, 1); + + if (set_flags & SET_VLAN_INSERT) { + if (MLX5_CAP_ESW(dev, vport_cvlan_insert_always)) { + /* insert either if vlan exist in packet or not */ + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_insert, + MLX5_VPORT_CVLAN_INSERT_ALWAYS); + } else { + /* insert only if no vlan in packet */ + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_insert, + MLX5_VPORT_CVLAN_INSERT_WHEN_NO_CVLAN); + } + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.cvlan_pcp, qos); + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.cvlan_id, vlan); + } + + MLX5_SET(modify_esw_vport_context_in, in, + field_select.vport_cvlan_strip, 1); + MLX5_SET(modify_esw_vport_context_in, in, + field_select.vport_cvlan_insert, 1); + + return mlx5_eswitch_modify_esw_vport_context(dev, vport, true, in); +} + +/* E-Switch FDB */ +static struct mlx5_flow_handle * +__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u16 vport, bool rx_rule, + u8 mac_c[ETH_ALEN], u8 mac_v[ETH_ALEN]) +{ + int match_header = (is_zero_ether_addr(mac_c) ? 0 : + MLX5_MATCH_OUTER_HEADERS); + struct mlx5_flow_handle *flow_rule = NULL; + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_spec *spec; + void *mv_misc = NULL; + void *mc_misc = NULL; + u8 *dmac_v = NULL; + u8 *dmac_c = NULL; + + if (rx_rule) + match_header |= MLX5_MATCH_MISC_PARAMETERS; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return NULL; + + dmac_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dmac_47_16); + dmac_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.dmac_47_16); + + if (match_header & MLX5_MATCH_OUTER_HEADERS) { + ether_addr_copy(dmac_v, mac_v); + ether_addr_copy(dmac_c, mac_c); + } + + if (match_header & MLX5_MATCH_MISC_PARAMETERS) { + mv_misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + mc_misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + MLX5_SET(fte_match_set_misc, mv_misc, source_port, MLX5_VPORT_UPLINK); + MLX5_SET_TO_ONES(fte_match_set_misc, mc_misc, source_port); + } + + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = vport; + + esw_debug(esw->dev, + "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n", + dmac_v, dmac_c, vport); + spec->match_criteria_enable = match_header; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_rule = + mlx5_add_flow_rules(esw->fdb_table.legacy.fdb, spec, + &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + esw_warn(esw->dev, + "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n", + dmac_v, dmac_c, vport, PTR_ERR(flow_rule)); + flow_rule = NULL; + } + + kvfree(spec); + return flow_rule; +} + +static struct mlx5_flow_handle * +esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u16 vport) +{ + u8 mac_c[ETH_ALEN]; + + eth_broadcast_addr(mac_c); + return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac); +} + +static struct mlx5_flow_handle * +esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u16 vport) +{ + u8 mac_c[ETH_ALEN]; + u8 mac_v[ETH_ALEN]; + + eth_zero_addr(mac_c); + eth_zero_addr(mac_v); + mac_c[0] = 0x01; + mac_v[0] = 0x01; + return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac_v); +} + +static struct mlx5_flow_handle * +esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u16 vport) +{ + u8 mac_c[ETH_ALEN]; + u8 mac_v[ETH_ALEN]; + + eth_zero_addr(mac_c); + eth_zero_addr(mac_v); + return __esw_fdb_set_vport_rule(esw, vport, true, mac_c, mac_v); +} + +/* E-Switch vport UC/MC lists management */ +typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, + struct vport_addr *vaddr); + +static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + u8 *mac = vaddr->node.addr; + u16 vport = vaddr->vport; + int err; + + /* Skip mlx5_mpfs_add_mac for eswitch_managers, + * it is already done by its netdev in mlx5e_execute_l2_action + */ + if (mlx5_esw_is_manager_vport(esw, vport)) + goto fdb_add; + + err = mlx5_mpfs_add_mac(esw->dev, mac); + if (err) { + esw_warn(esw->dev, + "Failed to add L2 table mac(%pM) for vport(0x%x), err(%d)\n", + mac, vport, err); + return err; + } + vaddr->mpfs = true; + +fdb_add: + /* SRIOV is enabled: Forward UC MAC to vport */ + if (esw->fdb_table.legacy.fdb && esw->mode == MLX5_ESWITCH_LEGACY) { + vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); + + esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n", + vport, mac, vaddr->flow_rule); + } + + return 0; +} + +static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + u8 *mac = vaddr->node.addr; + u16 vport = vaddr->vport; + int err = 0; + + /* Skip mlx5_mpfs_del_mac for eswitch managers, + * it is already done by its netdev in mlx5e_execute_l2_action + */ + if (!vaddr->mpfs || mlx5_esw_is_manager_vport(esw, vport)) + goto fdb_del; + + err = mlx5_mpfs_del_mac(esw->dev, mac); + if (err) + esw_warn(esw->dev, + "Failed to del L2 table mac(%pM) for vport(%d), err(%d)\n", + mac, vport, err); + vaddr->mpfs = false; + +fdb_del: + if (vaddr->flow_rule) + mlx5_del_flow_rules(vaddr->flow_rule); + vaddr->flow_rule = NULL; + + return 0; +} + +static void update_allmulti_vports(struct mlx5_eswitch *esw, + struct vport_addr *vaddr, + struct esw_mc_addr *esw_mc) +{ + u8 *mac = vaddr->node.addr; + struct mlx5_vport *vport; + unsigned long i; + u16 vport_num; + + mlx5_esw_for_each_vport(esw, i, vport) { + struct hlist_head *vport_hash = vport->mc_list; + struct vport_addr *iter_vaddr = + l2addr_hash_find(vport_hash, + mac, + struct vport_addr); + vport_num = vport->vport; + if (IS_ERR_OR_NULL(vport->allmulti_rule) || + vaddr->vport == vport_num) + continue; + switch (vaddr->action) { + case MLX5_ACTION_ADD: + if (iter_vaddr) + continue; + iter_vaddr = l2addr_hash_add(vport_hash, mac, + struct vport_addr, + GFP_KERNEL); + if (!iter_vaddr) { + esw_warn(esw->dev, + "ALL-MULTI: Failed to add MAC(%pM) to vport[%d] DB\n", + mac, vport_num); + continue; + } + iter_vaddr->vport = vport_num; + iter_vaddr->flow_rule = + esw_fdb_set_vport_rule(esw, + mac, + vport_num); + iter_vaddr->mc_promisc = true; + break; + case MLX5_ACTION_DEL: + if (!iter_vaddr) + continue; + mlx5_del_flow_rules(iter_vaddr->flow_rule); + l2addr_hash_del(iter_vaddr); + break; + } + } +} + +static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *mac = vaddr->node.addr; + u16 vport = vaddr->vport; + + if (!esw->fdb_table.legacy.fdb) + return 0; + + esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr); + if (esw_mc) + goto add; + + esw_mc = l2addr_hash_add(hash, mac, struct esw_mc_addr, GFP_KERNEL); + if (!esw_mc) + return -ENOMEM; + + esw_mc->uplink_rule = /* Forward MC MAC to Uplink */ + esw_fdb_set_vport_rule(esw, mac, MLX5_VPORT_UPLINK); + + /* Add this multicast mac to all the mc promiscuous vports */ + update_allmulti_vports(esw, vaddr, esw_mc); + +add: + /* If the multicast mac is added as a result of mc promiscuous vport, + * don't increment the multicast ref count + */ + if (!vaddr->mc_promisc) + esw_mc->refcnt++; + + /* Forward MC MAC to vport */ + vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); + esw_debug(esw->dev, + "\tADDED MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n", + vport, mac, vaddr->flow_rule, + esw_mc->refcnt, esw_mc->uplink_rule); + return 0; +} + +static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *mac = vaddr->node.addr; + u16 vport = vaddr->vport; + + if (!esw->fdb_table.legacy.fdb) + return 0; + + esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr); + if (!esw_mc) { + esw_warn(esw->dev, + "Failed to find eswitch MC addr for MAC(%pM) vport(%d)", + mac, vport); + return -EINVAL; + } + esw_debug(esw->dev, + "\tDELETE MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n", + vport, mac, vaddr->flow_rule, esw_mc->refcnt, + esw_mc->uplink_rule); + + if (vaddr->flow_rule) + mlx5_del_flow_rules(vaddr->flow_rule); + vaddr->flow_rule = NULL; + + /* If the multicast mac is added as a result of mc promiscuous vport, + * don't decrement the multicast ref count. + */ + if (vaddr->mc_promisc || (--esw_mc->refcnt > 0)) + return 0; + + /* Remove this multicast mac from all the mc promiscuous vports */ + update_allmulti_vports(esw, vaddr, esw_mc); + + if (esw_mc->uplink_rule) + mlx5_del_flow_rules(esw_mc->uplink_rule); + + l2addr_hash_del(esw_mc); + return 0; +} + +/* Apply vport UC/MC list to HW l2 table and FDB table */ +static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, int list_type) +{ + bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; + vport_addr_action vport_addr_add; + vport_addr_action vport_addr_del; + struct vport_addr *addr; + struct l2addr_node *node; + struct hlist_head *hash; + struct hlist_node *tmp; + int hi; + + vport_addr_add = is_uc ? esw_add_uc_addr : + esw_add_mc_addr; + vport_addr_del = is_uc ? esw_del_uc_addr : + esw_del_mc_addr; + + hash = is_uc ? vport->uc_list : vport->mc_list; + for_each_l2hash_node(node, tmp, hash, hi) { + addr = container_of(node, struct vport_addr, node); + switch (addr->action) { + case MLX5_ACTION_ADD: + vport_addr_add(esw, addr); + addr->action = MLX5_ACTION_NONE; + break; + case MLX5_ACTION_DEL: + vport_addr_del(esw, addr); + l2addr_hash_del(addr); + break; + } + } +} + +/* Sync vport UC/MC list from vport context */ +static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, int list_type) +{ + bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; + u8 (*mac_list)[ETH_ALEN]; + struct l2addr_node *node; + struct vport_addr *addr; + struct hlist_head *hash; + struct hlist_node *tmp; + int size; + int err; + int hi; + int i; + + size = is_uc ? MLX5_MAX_UC_PER_VPORT(esw->dev) : + MLX5_MAX_MC_PER_VPORT(esw->dev); + + mac_list = kcalloc(size, ETH_ALEN, GFP_KERNEL); + if (!mac_list) + return; + + hash = is_uc ? vport->uc_list : vport->mc_list; + + for_each_l2hash_node(node, tmp, hash, hi) { + addr = container_of(node, struct vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + + if (!vport->enabled) + goto out; + + err = mlx5_query_nic_vport_mac_list(esw->dev, vport->vport, list_type, + mac_list, &size); + if (err) + goto out; + esw_debug(esw->dev, "vport[%d] context update %s list size (%d)\n", + vport->vport, is_uc ? "UC" : "MC", size); + + for (i = 0; i < size; i++) { + if (is_uc && !is_valid_ether_addr(mac_list[i])) + continue; + + if (!is_uc && !is_multicast_ether_addr(mac_list[i])) + continue; + + addr = l2addr_hash_find(hash, mac_list[i], struct vport_addr); + if (addr) { + addr->action = MLX5_ACTION_NONE; + /* If this mac was previously added because of allmulti + * promiscuous rx mode, its now converted to be original + * vport mac. + */ + if (addr->mc_promisc) { + struct esw_mc_addr *esw_mc = + l2addr_hash_find(esw->mc_table, + mac_list[i], + struct esw_mc_addr); + if (!esw_mc) { + esw_warn(esw->dev, + "Failed to MAC(%pM) in mcast DB\n", + mac_list[i]); + continue; + } + esw_mc->refcnt++; + addr->mc_promisc = false; + } + continue; + } + + addr = l2addr_hash_add(hash, mac_list[i], struct vport_addr, + GFP_KERNEL); + if (!addr) { + esw_warn(esw->dev, + "Failed to add MAC(%pM) to vport[%d] DB\n", + mac_list[i], vport->vport); + continue; + } + addr->vport = vport->vport; + addr->action = MLX5_ACTION_ADD; + } +out: + kfree(mac_list); +} + +/* Sync vport UC/MC list from vport context + * Must be called after esw_update_vport_addr_list + */ +static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + struct l2addr_node *node; + struct vport_addr *addr; + struct hlist_head *hash; + struct hlist_node *tmp; + int hi; + + hash = vport->mc_list; + + for_each_l2hash_node(node, tmp, esw->mc_table, hi) { + u8 *mac = node->addr; + + addr = l2addr_hash_find(hash, mac, struct vport_addr); + if (addr) { + if (addr->action == MLX5_ACTION_DEL) + addr->action = MLX5_ACTION_NONE; + continue; + } + addr = l2addr_hash_add(hash, mac, struct vport_addr, + GFP_KERNEL); + if (!addr) { + esw_warn(esw->dev, + "Failed to add allmulti MAC(%pM) to vport[%d] DB\n", + mac, vport->vport); + continue; + } + addr->vport = vport->vport; + addr->action = MLX5_ACTION_ADD; + addr->mc_promisc = true; + } +} + +/* Apply vport rx mode to HW FDB table */ +static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + bool promisc, bool mc_promisc) +{ + struct esw_mc_addr *allmulti_addr = &esw->mc_promisc; + + if (IS_ERR_OR_NULL(vport->allmulti_rule) != mc_promisc) + goto promisc; + + if (mc_promisc) { + vport->allmulti_rule = + esw_fdb_set_vport_allmulti_rule(esw, vport->vport); + if (!allmulti_addr->uplink_rule) + allmulti_addr->uplink_rule = + esw_fdb_set_vport_allmulti_rule(esw, + MLX5_VPORT_UPLINK); + allmulti_addr->refcnt++; + } else if (vport->allmulti_rule) { + mlx5_del_flow_rules(vport->allmulti_rule); + vport->allmulti_rule = NULL; + + if (--allmulti_addr->refcnt > 0) + goto promisc; + + if (allmulti_addr->uplink_rule) + mlx5_del_flow_rules(allmulti_addr->uplink_rule); + allmulti_addr->uplink_rule = NULL; + } + +promisc: + if (IS_ERR_OR_NULL(vport->promisc_rule) != promisc) + return; + + if (promisc) { + vport->promisc_rule = + esw_fdb_set_vport_promisc_rule(esw, vport->vport); + } else if (vport->promisc_rule) { + mlx5_del_flow_rules(vport->promisc_rule); + vport->promisc_rule = NULL; + } +} + +/* Sync vport rx mode from vport context */ +static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int promisc_all = 0; + int promisc_uc = 0; + int promisc_mc = 0; + int err; + + err = mlx5_query_nic_vport_promisc(esw->dev, + vport->vport, + &promisc_uc, + &promisc_mc, + &promisc_all); + if (err) + return; + esw_debug(esw->dev, "vport[%d] context update rx mode promisc_all=%d, all_multi=%d\n", + vport->vport, promisc_all, promisc_mc); + + if (!vport->info.trusted || !vport->enabled) { + promisc_uc = 0; + promisc_mc = 0; + promisc_all = 0; + } + + esw_apply_vport_rx_mode(esw, vport, promisc_all, + (promisc_all || promisc_mc)); +} + +void esw_vport_change_handle_locked(struct mlx5_vport *vport) +{ + struct mlx5_core_dev *dev = vport->dev; + struct mlx5_eswitch *esw = dev->priv.eswitch; + u8 mac[ETH_ALEN]; + + if (!MLX5_CAP_GEN(dev, log_max_l2_table)) + return; + + mlx5_query_nic_vport_mac_address(dev, vport->vport, true, mac); + esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n", + vport->vport, mac); + + if (vport->enabled_events & MLX5_VPORT_UC_ADDR_CHANGE) { + esw_update_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_UC); + esw_apply_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_UC); + } + + if (vport->enabled_events & MLX5_VPORT_MC_ADDR_CHANGE) + esw_update_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_MC); + + if (vport->enabled_events & MLX5_VPORT_PROMISC_CHANGE) { + esw_update_vport_rx_mode(esw, vport); + if (!IS_ERR_OR_NULL(vport->allmulti_rule)) + esw_update_vport_mc_promisc(esw, vport); + } + + if (vport->enabled_events & (MLX5_VPORT_PROMISC_CHANGE | MLX5_VPORT_MC_ADDR_CHANGE)) + esw_apply_vport_addr_list(esw, vport, MLX5_NVPRT_LIST_TYPE_MC); + + esw_debug(esw->dev, "vport[%d] Context Changed: Done\n", vport->vport); + if (vport->enabled) + arm_vport_context_events_cmd(dev, vport->vport, + vport->enabled_events); +} + +static void esw_vport_change_handler(struct work_struct *work) +{ + struct mlx5_vport *vport = + container_of(work, struct mlx5_vport, vport_change_handler); + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; + + mutex_lock(&esw->state_lock); + esw_vport_change_handle_locked(vport); + mutex_unlock(&esw->state_lock); +} + +static void node_guid_gen_from_mac(u64 *node_guid, const u8 *mac) +{ + ((u8 *)node_guid)[7] = mac[0]; + ((u8 *)node_guid)[6] = mac[1]; + ((u8 *)node_guid)[5] = mac[2]; + ((u8 *)node_guid)[4] = 0xff; + ((u8 *)node_guid)[3] = 0xfe; + ((u8 *)node_guid)[2] = mac[3]; + ((u8 *)node_guid)[1] = mac[4]; + ((u8 *)node_guid)[0] = mac[5]; +} + +static int esw_vport_setup_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (esw->mode == MLX5_ESWITCH_LEGACY) + return esw_legacy_vport_acl_setup(esw, vport); + else + return esw_vport_create_offloads_acl_tables(esw, vport); +} + +static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (esw->mode == MLX5_ESWITCH_LEGACY) + esw_legacy_vport_acl_cleanup(esw, vport); + else + esw_vport_destroy_offloads_acl_tables(esw, vport); +} + +static int mlx5_esw_vport_caps_get(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *query_ctx; + void *hca_caps; + int err; + + if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) + return 0; + + query_ctx = kzalloc(query_out_sz, GFP_KERNEL); + if (!query_ctx) + return -ENOMEM; + + err = mlx5_vport_get_other_func_cap(esw->dev, vport->vport, query_ctx, + MLX5_CAP_GENERAL); + if (err) + goto out_free; + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); + vport->info.roce_enabled = MLX5_GET(cmd_hca_cap, hca_caps, roce); + + if (!MLX5_CAP_GEN_MAX(esw->dev, hca_cap_2)) + goto out_free; + + memset(query_ctx, 0, query_out_sz); + err = mlx5_vport_get_other_func_cap(esw->dev, vport->vport, query_ctx, + MLX5_CAP_GENERAL_2); + if (err) + goto out_free; + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); + vport->info.mig_enabled = MLX5_GET(cmd_hca_cap_2, hca_caps, migratable); + + err = mlx5_esw_ipsec_vf_offload_get(esw->dev, vport); +out_free: + kfree(query_ctx); + return err; +} + +static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + bool vst_mode_steering = esw_vst_mode_is_steering(esw); + u16 vport_num = vport->vport; + int flags; + int err; + + err = esw_vport_setup_acl(esw, vport); + if (err) + return err; + + if (mlx5_esw_is_manager_vport(esw, vport_num)) + return 0; + + err = mlx5_esw_vport_caps_get(esw, vport); + if (err) + goto err_caps; + + mlx5_modify_vport_admin_state(esw->dev, + MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, + vport_num, 1, + vport->info.link_state); + + /* Host PF has its own mac/guid. */ + if (vport_num) { + mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, + vport->info.mac); + mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, + vport->info.node_guid); + } + + flags = (vport->info.vlan || vport->info.qos) ? + SET_VLAN_STRIP | SET_VLAN_INSERT : 0; + if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering) + modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, + vport->info.qos, flags); + + return 0; + +err_caps: + esw_vport_cleanup_acl(esw, vport); + return err; +} + +/* Don't cleanup vport->info, it's needed to restore vport configuration */ +static void esw_vport_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + u16 vport_num = vport->vport; + + if (!mlx5_esw_is_manager_vport(esw, vport_num)) + mlx5_modify_vport_admin_state(esw->dev, + MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, + vport_num, 1, + MLX5_VPORT_ADMIN_STATE_DOWN); + + mlx5_esw_qos_vport_disable(esw, vport); + esw_vport_cleanup_acl(esw, vport); +} + +int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + enum mlx5_eswitch_vport_event enabled_events) +{ + u16 vport_num = vport->vport; + int ret; + + mutex_lock(&esw->state_lock); + WARN_ON(vport->enabled); + + esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); + + ret = esw_vport_setup(esw, vport); + if (ret) + goto done; + + /* Sync with current vport context */ + vport->enabled_events = enabled_events; + vport->enabled = true; + if (vport->vport != MLX5_VPORT_PF && + (vport->info.ipsec_crypto_enabled || vport->info.ipsec_packet_enabled)) + esw->enabled_ipsec_vf_count++; + + /* Esw manager is trusted by default. Host PF (vport 0) is trusted as well + * in smartNIC as it's a vport group manager. + */ + if (mlx5_esw_is_manager_vport(esw, vport_num) || + (!vport_num && mlx5_core_is_ecpf(esw->dev))) + vport->info.trusted = true; + + if (!mlx5_esw_is_manager_vport(esw, vport_num) && + MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) { + ret = mlx5_esw_vport_vhca_id_set(esw, vport_num); + if (ret) + goto err_vhca_mapping; + } + + /* External controller host PF has factory programmed MAC. + * Read it from the device. + */ + if (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF) + mlx5_query_nic_vport_mac_address(esw->dev, vport_num, true, vport->info.mac); + + esw_vport_change_handle_locked(vport); + + esw->enabled_vports++; + esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num); +done: + mutex_unlock(&esw->state_lock); + return ret; + +err_vhca_mapping: + esw_vport_cleanup(esw, vport); + mutex_unlock(&esw->state_lock); + return ret; +} + +void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + u16 vport_num = vport->vport; + + mutex_lock(&esw->state_lock); + + if (!vport->enabled) + goto done; + + esw_debug(esw->dev, "Disabling vport(%d)\n", vport_num); + /* Mark this vport as disabled to discard new events */ + vport->enabled = false; + + /* Disable events from this vport */ + if (MLX5_CAP_GEN(esw->dev, log_max_l2_table)) + arm_vport_context_events_cmd(esw->dev, vport_num, 0); + + if (!mlx5_esw_is_manager_vport(esw, vport_num) && + MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) + mlx5_esw_vport_vhca_id_clear(esw, vport_num); + + if (vport->vport != MLX5_VPORT_PF && + (vport->info.ipsec_crypto_enabled || vport->info.ipsec_packet_enabled)) + esw->enabled_ipsec_vf_count--; + + /* We don't assume VFs will cleanup after themselves. + * Calling vport change handler while vport is disabled will cleanup + * the vport resources. + */ + esw_vport_change_handle_locked(vport); + vport->enabled_events = 0; + esw_apply_vport_rx_mode(esw, vport, false, false); + esw_vport_cleanup(esw, vport); + esw->enabled_vports--; + +done: + mutex_unlock(&esw->state_lock); +} + +static int eswitch_vport_event(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_eswitch *esw = mlx5_nb_cof(nb, struct mlx5_eswitch, nb); + struct mlx5_eqe *eqe = data; + struct mlx5_vport *vport; + u16 vport_num; + + vport_num = be16_to_cpu(eqe->data.vport_change.vport_num); + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (!IS_ERR(vport)) + queue_work(esw->work_queue, &vport->vport_change_handler); + return NOTIFY_OK; +} + +/** + * mlx5_esw_query_functions - Returns raw output about functions state + * @dev: Pointer to device to query + * + * mlx5_esw_query_functions() allocates and returns functions changed + * raw output memory pointer from device on success. Otherwise returns ERR_PTR. + * Caller must free the memory using kvfree() when valid pointer is returned. + */ +const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) +{ + int outlen = MLX5_ST_SZ_BYTES(query_esw_functions_out); + u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {}; + u32 *out; + int err; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return ERR_PTR(-ENOMEM); + + MLX5_SET(query_esw_functions_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_FUNCTIONS); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); + if (!err) + return out; + + kvfree(out); + return ERR_PTR(err); +} + +static void mlx5_eswitch_event_handler_register(struct mlx5_eswitch *esw) +{ + if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) { + MLX5_NB_INIT(&esw->esw_funcs.nb, mlx5_esw_funcs_changed_handler, + ESW_FUNCTIONS_CHANGED); + mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb); + } +} + +static void mlx5_eswitch_event_handler_unregister(struct mlx5_eswitch *esw) +{ + if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) + mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb); + + flush_workqueue(esw->work_queue); +} + +static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + + mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) { + memset(&vport->qos, 0, sizeof(vport->qos)); + memset(&vport->info, 0, sizeof(vport->info)); + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; + } +} + +static void mlx5_eswitch_clear_ec_vf_vports_info(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, esw->esw_funcs.num_ec_vfs) { + memset(&vport->qos, 0, sizeof(vport->qos)); + memset(&vport->info, 0, sizeof(vport->info)); + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; + } +} + +static int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + enum mlx5_eswitch_vport_event enabled_events) +{ + int err; + + err = mlx5_esw_vport_enable(esw, vport, enabled_events); + if (err) + return err; + + err = mlx5_esw_offloads_load_rep(esw, vport); + if (err) + goto err_rep; + + return err; + +err_rep: + mlx5_esw_vport_disable(esw, vport); + return err; +} + +static void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + mlx5_esw_offloads_unload_rep(esw, vport); + mlx5_esw_vport_disable(esw, vport); +} + +static int mlx5_eswitch_load_pf_vf_vport(struct mlx5_eswitch *esw, u16 vport_num, + enum mlx5_eswitch_vport_event enabled_events) +{ + struct mlx5_vport *vport; + int err; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return PTR_ERR(vport); + + err = mlx5_esw_offloads_init_pf_vf_rep(esw, vport); + if (err) + return err; + + err = mlx5_eswitch_load_vport(esw, vport, enabled_events); + if (err) + goto err_load; + return 0; + +err_load: + mlx5_esw_offloads_cleanup_pf_vf_rep(esw, vport); + return err; +} + +static void mlx5_eswitch_unload_pf_vf_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return; + + mlx5_eswitch_unload_vport(esw, vport); + mlx5_esw_offloads_cleanup_pf_vf_rep(esw, vport); +} + +int mlx5_eswitch_load_sf_vport(struct mlx5_eswitch *esw, u16 vport_num, + enum mlx5_eswitch_vport_event enabled_events, + struct mlx5_devlink_port *dl_port, u32 controller, u32 sfnum) +{ + struct mlx5_vport *vport; + int err; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return PTR_ERR(vport); + + err = mlx5_esw_offloads_init_sf_rep(esw, vport, dl_port, controller, sfnum); + if (err) + return err; + + err = mlx5_eswitch_load_vport(esw, vport, enabled_events); + if (err) + goto err_load; + + return 0; + +err_load: + mlx5_esw_offloads_cleanup_sf_rep(esw, vport); + return err; +} + +void mlx5_eswitch_unload_sf_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return; + + mlx5_eswitch_unload_vport(esw, vport); + mlx5_esw_offloads_cleanup_sf_rep(esw, vport); +} + +void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs) +{ + struct mlx5_vport *vport; + unsigned long i; + + mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) { + if (!vport->enabled) + continue; + mlx5_eswitch_unload_pf_vf_vport(esw, vport->vport); + } +} + +static void mlx5_eswitch_unload_ec_vf_vports(struct mlx5_eswitch *esw, + u16 num_ec_vfs) +{ + struct mlx5_vport *vport; + unsigned long i; + + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, num_ec_vfs) { + if (!vport->enabled) + continue; + mlx5_eswitch_unload_pf_vf_vport(esw, vport->vport); + } +} + +int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs, + enum mlx5_eswitch_vport_event enabled_events) +{ + struct mlx5_vport *vport; + unsigned long i; + int err; + + mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) { + err = mlx5_eswitch_load_pf_vf_vport(esw, vport->vport, enabled_events); + if (err) + goto vf_err; + } + + return 0; + +vf_err: + mlx5_eswitch_unload_vf_vports(esw, num_vfs); + return err; +} + +static int mlx5_eswitch_load_ec_vf_vports(struct mlx5_eswitch *esw, u16 num_ec_vfs, + enum mlx5_eswitch_vport_event enabled_events) +{ + struct mlx5_vport *vport; + unsigned long i; + int err; + + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, num_ec_vfs) { + err = mlx5_eswitch_load_pf_vf_vport(esw, vport->vport, enabled_events); + if (err) + goto vf_err; + } + + return 0; + +vf_err: + mlx5_eswitch_unload_ec_vf_vports(esw, num_ec_vfs); + return err; +} + +static int host_pf_enable_hca(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_ecpf(dev)) + return 0; + + /* Once vport and representor are ready, take out the external host PF + * out of initializing state. Enabling HCA clears the iser->initializing + * bit and host PF driver loading can progress. + */ + return mlx5_cmd_host_pf_enable_hca(dev); +} + +static void host_pf_disable_hca(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_ecpf(dev)) + return; + + mlx5_cmd_host_pf_disable_hca(dev); +} + +/* mlx5_eswitch_enable_pf_vf_vports() enables vports of PF, ECPF and VFs + * whichever are present on the eswitch. + */ +int +mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, + enum mlx5_eswitch_vport_event enabled_events) +{ + bool pf_needed; + int ret; + + pf_needed = mlx5_core_is_ecpf_esw_manager(esw->dev) || + esw->mode == MLX5_ESWITCH_LEGACY; + + /* Enable PF vport */ + if (pf_needed) { + ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_PF, + enabled_events); + if (ret) + return ret; + } + + /* Enable external host PF HCA */ + ret = host_pf_enable_hca(esw->dev); + if (ret) + goto pf_hca_err; + + /* Enable ECPF vport */ + if (mlx5_ecpf_vport_exists(esw->dev)) { + ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_ECPF, enabled_events); + if (ret) + goto ecpf_err; + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + ret = mlx5_eswitch_load_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs, + enabled_events); + if (ret) + goto ec_vf_err; + } + } + + /* Enable VF vports */ + ret = mlx5_eswitch_load_vf_vports(esw, esw->esw_funcs.num_vfs, + enabled_events); + if (ret) + goto vf_err; + return 0; + +vf_err: + if (mlx5_core_ec_sriov_enabled(esw->dev)) + mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs); +ec_vf_err: + if (mlx5_ecpf_vport_exists(esw->dev)) + mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF); +ecpf_err: + host_pf_disable_hca(esw->dev); +pf_hca_err: + if (pf_needed) + mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF); + return ret; +} + +/* mlx5_eswitch_disable_pf_vf_vports() disables vports of PF, ECPF and VFs + * whichever are previously enabled on the eswitch. + */ +void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) +{ + mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); + + if (mlx5_ecpf_vport_exists(esw->dev)) { + if (mlx5_core_ec_sriov_enabled(esw->dev)) + mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_vfs); + mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF); + } + + host_pf_disable_hca(esw->dev); + + if (mlx5_core_is_ecpf_esw_manager(esw->dev) || + esw->mode == MLX5_ESWITCH_LEGACY) + mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF); +} + +static void mlx5_eswitch_get_devlink_param(struct mlx5_eswitch *esw) +{ + struct devlink *devlink = priv_to_devlink(esw->dev); + union devlink_param_value val; + int err; + + err = devl_param_driverinit_value_get(devlink, + MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, + &val); + if (!err) { + esw->params.large_group_num = val.vu32; + } else { + esw_warn(esw->dev, + "Devlink can't get param fdb_large_groups, uses default (%d).\n", + ESW_OFFLOADS_DEFAULT_NUM_GROUPS); + esw->params.large_group_num = ESW_OFFLOADS_DEFAULT_NUM_GROUPS; + } +} + +static void +mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, int num_vfs) +{ + const u32 *out; + + if (num_vfs < 0) + return; + + if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) { + esw->esw_funcs.num_vfs = num_vfs; + return; + } + + out = mlx5_esw_query_functions(esw->dev); + if (IS_ERR(out)) + return; + + esw->esw_funcs.num_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_num_of_vfs); + if (mlx5_core_ec_sriov_enabled(esw->dev)) + esw->esw_funcs.num_ec_vfs = num_vfs; + + kvfree(out); +} + +static void mlx5_esw_mode_change_notify(struct mlx5_eswitch *esw, u16 mode) +{ + struct mlx5_esw_event_info info = {}; + + info.new_mode = mode; + + blocking_notifier_call_chain(&esw->n_head, 0, &info); +} + +static int mlx5_esw_acls_ns_init(struct mlx5_eswitch *esw) +{ + struct mlx5_core_dev *dev = esw->dev; + int total_vports; + int err; + + if (esw->flags & MLX5_ESWITCH_VPORT_ACL_NS_CREATED) + return 0; + + total_vports = mlx5_eswitch_get_total_vports(dev); + + if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) { + err = mlx5_fs_egress_acls_init(dev, total_vports); + if (err) + return err; + } else { + esw_warn(dev, "egress ACL is not supported by FW\n"); + } + + if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) { + err = mlx5_fs_ingress_acls_init(dev, total_vports); + if (err) + goto err; + } else { + esw_warn(dev, "ingress ACL is not supported by FW\n"); + } + esw->flags |= MLX5_ESWITCH_VPORT_ACL_NS_CREATED; + return 0; + +err: + if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) + mlx5_fs_egress_acls_cleanup(dev); + return err; +} + +static void mlx5_esw_acls_ns_cleanup(struct mlx5_eswitch *esw) +{ + struct mlx5_core_dev *dev = esw->dev; + + esw->flags &= ~MLX5_ESWITCH_VPORT_ACL_NS_CREATED; + if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) + mlx5_fs_ingress_acls_cleanup(dev); + if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) + mlx5_fs_egress_acls_cleanup(dev); +} + +/** + * mlx5_eswitch_enable_locked - Enable eswitch + * @esw: Pointer to eswitch + * @num_vfs: Enable eswitch for given number of VFs. This is optional. + * Valid value are 0, > 0 and MLX5_ESWITCH_IGNORE_NUM_VFS. + * Caller should pass num_vfs > 0 when enabling eswitch for + * vf vports. Caller should pass num_vfs = 0, when eswitch + * is enabled without sriov VFs or when caller + * is unaware of the sriov state of the host PF on ECPF based + * eswitch. Caller should pass < 0 when num_vfs should be + * completely ignored. This is typically the case when eswitch + * is enabled without sriov regardless of PF/ECPF system. + * mlx5_eswitch_enable_locked() Enables eswitch in either legacy or offloads + * mode. If num_vfs >=0 is provided, it setup VF related eswitch vports. + * It returns 0 on success or error code on failure. + */ +int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) +{ + int err; + + devl_assert_locked(priv_to_devlink(esw->dev)); + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { + esw_warn(esw->dev, "FDB is not supported, aborting ...\n"); + return -EOPNOTSUPP; + } + + mlx5_eswitch_get_devlink_param(esw); + + err = mlx5_esw_acls_ns_init(esw); + if (err) + return err; + + mlx5_eswitch_update_num_of_vfs(esw, num_vfs); + + MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); + mlx5_eq_notifier_register(esw->dev, &esw->nb); + + if (esw->mode == MLX5_ESWITCH_LEGACY) { + err = esw_legacy_enable(esw); + } else { + mlx5_rescan_drivers(esw->dev); + err = esw_offloads_enable(esw); + } + + if (err) + goto abort; + + esw->fdb_table.flags |= MLX5_ESW_FDB_CREATED; + + mlx5_eswitch_event_handler_register(esw); + + esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", + esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", + esw->esw_funcs.num_vfs, esw->esw_funcs.num_ec_vfs, esw->enabled_vports); + + mlx5_esw_mode_change_notify(esw, esw->mode); + + return 0; + +abort: + mlx5_esw_acls_ns_cleanup(esw); + return err; +} + +/** + * mlx5_eswitch_enable - Enable eswitch + * @esw: Pointer to eswitch + * @num_vfs: Enable eswitch switch for given number of VFs. + * Caller must pass num_vfs > 0 when enabling eswitch for + * vf vports. + * mlx5_eswitch_enable() returns 0 on success or error code on failure. + */ +int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) +{ + bool toggle_lag; + int ret = 0; + + if (!mlx5_esw_allowed(esw)) + return 0; + + devl_assert_locked(priv_to_devlink(esw->dev)); + + toggle_lag = !mlx5_esw_is_fdb_created(esw); + + if (toggle_lag) + mlx5_lag_disable_change(esw->dev); + + if (!mlx5_esw_is_fdb_created(esw)) { + ret = mlx5_eswitch_enable_locked(esw, num_vfs); + } else { + enum mlx5_eswitch_vport_event vport_events; + + vport_events = (esw->mode == MLX5_ESWITCH_LEGACY) ? + MLX5_LEGACY_SRIOV_VPORT_EVENTS : MLX5_VPORT_UC_ADDR_CHANGE; + /* If this is the ECPF the number of host VFs is managed via the + * eswitch function change event handler, and any num_vfs provided + * here are intended to be EC VFs. + */ + if (!mlx5_core_is_ecpf(esw->dev)) { + ret = mlx5_eswitch_load_vf_vports(esw, num_vfs, vport_events); + if (!ret) + esw->esw_funcs.num_vfs = num_vfs; + } else if (mlx5_core_ec_sriov_enabled(esw->dev)) { + ret = mlx5_eswitch_load_ec_vf_vports(esw, num_vfs, vport_events); + if (!ret) + esw->esw_funcs.num_ec_vfs = num_vfs; + } + } + + if (toggle_lag) + mlx5_lag_enable_change(esw->dev); + + return ret; +} + +/* When disabling sriov, free driver level resources. */ +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) +{ + if (!mlx5_esw_allowed(esw)) + return; + + devl_assert_locked(priv_to_devlink(esw->dev)); + /* If driver is unloaded, this function is called twice by remove_one() + * and mlx5_unload(). Prevent the second call. + */ + if (!esw->esw_funcs.num_vfs && !esw->esw_funcs.num_ec_vfs && !clear_vf) + return; + + esw_info(esw->dev, "Unload vfs: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", + esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", + esw->esw_funcs.num_vfs, esw->esw_funcs.num_ec_vfs, esw->enabled_vports); + + if (!mlx5_core_is_ecpf(esw->dev)) { + mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); + if (clear_vf) + mlx5_eswitch_clear_vf_vports_info(esw); + } else if (mlx5_core_ec_sriov_enabled(esw->dev)) { + mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs); + if (clear_vf) + mlx5_eswitch_clear_ec_vf_vports_info(esw); + } + + if (esw->mode == MLX5_ESWITCH_OFFLOADS) { + struct devlink *devlink = priv_to_devlink(esw->dev); + + devl_rate_nodes_destroy(devlink); + } + /* Destroy legacy fdb when disabling sriov in legacy mode. */ + if (esw->mode == MLX5_ESWITCH_LEGACY) + mlx5_eswitch_disable_locked(esw); + + if (!mlx5_core_is_ecpf(esw->dev)) + esw->esw_funcs.num_vfs = 0; + else + esw->esw_funcs.num_ec_vfs = 0; +} + +/* Free resources for corresponding eswitch mode. It is called by devlink + * when changing eswitch mode or modprobe when unloading driver. + */ +void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw) +{ + struct devlink *devlink = priv_to_devlink(esw->dev); + + /* Notify eswitch users that it is exiting from current mode. + * So that it can do necessary cleanup before the eswitch is disabled. + */ + mlx5_esw_mode_change_notify(esw, MLX5_ESWITCH_LEGACY); + + mlx5_eq_notifier_unregister(esw->dev, &esw->nb); + mlx5_eswitch_event_handler_unregister(esw); + + esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", + esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", + esw->esw_funcs.num_vfs, esw->esw_funcs.num_ec_vfs, esw->enabled_vports); + + if (esw->fdb_table.flags & MLX5_ESW_FDB_CREATED) { + esw->fdb_table.flags &= ~MLX5_ESW_FDB_CREATED; + if (esw->mode == MLX5_ESWITCH_OFFLOADS) + esw_offloads_disable(esw); + else if (esw->mode == MLX5_ESWITCH_LEGACY) + esw_legacy_disable(esw); + mlx5_esw_acls_ns_cleanup(esw); + } + + if (esw->mode == MLX5_ESWITCH_OFFLOADS) + devl_rate_nodes_destroy(devlink); +} + +void mlx5_eswitch_disable(struct mlx5_eswitch *esw) +{ + if (!mlx5_esw_allowed(esw)) + return; + + devl_assert_locked(priv_to_devlink(esw->dev)); + mlx5_lag_disable_change(esw->dev); + mlx5_eswitch_disable_locked(esw); + esw->mode = MLX5_ESWITCH_LEGACY; + mlx5_lag_enable_change(esw->dev); +} + +static int mlx5_query_hca_cap_host_pf(struct mlx5_core_dev *dev, void *out) +{ + u16 opmod = (MLX5_CAP_GENERAL << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01); + u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {}; + + MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); + MLX5_SET(query_hca_cap_in, in, op_mod, opmod); + MLX5_SET(query_hca_cap_in, in, function_id, MLX5_VPORT_PF); + MLX5_SET(query_hca_cap_in, in, other_function, true); + return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out); +} + +int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *sf_base_id) + +{ + int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *query_ctx; + void *hca_caps; + int err; + + if (!mlx5_core_is_ecpf(dev)) { + *max_sfs = 0; + return 0; + } + + query_ctx = kzalloc(query_out_sz, GFP_KERNEL); + if (!query_ctx) + return -ENOMEM; + + err = mlx5_query_hca_cap_host_pf(dev, query_ctx); + if (err) + goto out_free; + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); + *max_sfs = MLX5_GET(cmd_hca_cap, hca_caps, max_num_sf); + *sf_base_id = MLX5_GET(cmd_hca_cap, hca_caps, sf_base_id); + +out_free: + kfree(query_ctx); + return err; +} + +static int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw, + int index, u16 vport_num) +{ + struct mlx5_vport *vport; + int err; + + vport = kzalloc(sizeof(*vport), GFP_KERNEL); + if (!vport) + return -ENOMEM; + + vport->dev = esw->dev; + vport->vport = vport_num; + vport->index = index; + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; + INIT_WORK(&vport->vport_change_handler, esw_vport_change_handler); + err = xa_insert(&esw->vports, vport_num, vport, GFP_KERNEL); + if (err) + goto insert_err; + + esw->total_vports++; + return 0; + +insert_err: + kfree(vport); + return err; +} + +static void mlx5_esw_vport_free(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + xa_erase(&esw->vports, vport->vport); + kfree(vport); +} + +static void mlx5_esw_vports_cleanup(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + + mlx5_esw_for_each_vport(esw, i, vport) + mlx5_esw_vport_free(esw, vport); + xa_destroy(&esw->vports); +} + +static int mlx5_esw_vports_init(struct mlx5_eswitch *esw) +{ + struct mlx5_core_dev *dev = esw->dev; + u16 max_host_pf_sfs; + u16 base_sf_num; + int idx = 0; + int err; + int i; + + xa_init(&esw->vports); + + err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_PF); + if (err) + goto err; + if (esw->first_host_vport == MLX5_VPORT_PF) + xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN); + idx++; + + for (i = 0; i < mlx5_core_max_vfs(dev); i++) { + err = mlx5_esw_vport_alloc(esw, idx, idx); + if (err) + goto err; + xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_VF); + xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN); + idx++; + } + base_sf_num = mlx5_sf_start_function_id(dev); + for (i = 0; i < mlx5_sf_max_functions(dev); i++) { + err = mlx5_esw_vport_alloc(esw, idx, base_sf_num + i); + if (err) + goto err; + xa_set_mark(&esw->vports, base_sf_num + i, MLX5_ESW_VPT_SF); + idx++; + } + + err = mlx5_esw_sf_max_hpf_functions(dev, &max_host_pf_sfs, &base_sf_num); + if (err) + goto err; + for (i = 0; i < max_host_pf_sfs; i++) { + err = mlx5_esw_vport_alloc(esw, idx, base_sf_num + i); + if (err) + goto err; + xa_set_mark(&esw->vports, base_sf_num + i, MLX5_ESW_VPT_SF); + idx++; + } + + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + int ec_vf_base_num = mlx5_core_ec_vf_vport_base(dev); + + for (i = 0; i < mlx5_core_max_ec_vfs(esw->dev); i++) { + err = mlx5_esw_vport_alloc(esw, idx, ec_vf_base_num + i); + if (err) + goto err; + idx++; + } + } + + if (mlx5_ecpf_vport_exists(dev) || + mlx5_core_is_ecpf_esw_manager(dev)) { + err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_ECPF); + if (err) + goto err; + idx++; + } + err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_UPLINK); + if (err) + goto err; + return 0; + +err: + mlx5_esw_vports_cleanup(esw); + return err; +} + +static int mlx5_devlink_esw_multiport_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!MLX5_ESWITCH_MANAGER(dev)) + return -EOPNOTSUPP; + + if (ctx->val.vbool) + return mlx5_lag_mpesw_enable(dev); + + mlx5_lag_mpesw_disable(dev); + return 0; +} + +static int mlx5_devlink_esw_multiport_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + ctx->val.vbool = mlx5_lag_is_mpesw(dev); + return 0; +} + +static const struct devlink_param mlx5_eswitch_params[] = { + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT, + "esw_multiport", DEVLINK_PARAM_TYPE_BOOL, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + mlx5_devlink_esw_multiport_get, + mlx5_devlink_esw_multiport_set, NULL), +}; + +int mlx5_eswitch_init(struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw; + int err; + + if (!MLX5_VPORT_MANAGER(dev) && !MLX5_ESWITCH_MANAGER(dev)) + return 0; + + esw = kzalloc(sizeof(*esw), GFP_KERNEL); + if (!esw) + return -ENOMEM; + + err = devl_params_register(priv_to_devlink(dev), mlx5_eswitch_params, + ARRAY_SIZE(mlx5_eswitch_params)); + if (err) + goto free_esw; + + esw->dev = dev; + esw->manager_vport = mlx5_eswitch_manager_vport(dev); + esw->first_host_vport = mlx5_eswitch_first_host_vport_num(dev); + + esw->debugfs_root = debugfs_create_dir("esw", mlx5_debugfs_get_dev_root(dev)); + esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); + if (!esw->work_queue) { + err = -ENOMEM; + goto abort; + } + + err = mlx5_esw_vports_init(esw); + if (err) + goto abort; + + err = esw_offloads_init(esw); + if (err) + goto reps_err; + + mutex_init(&esw->offloads.encap_tbl_lock); + hash_init(esw->offloads.encap_tbl); + mutex_init(&esw->offloads.decap_tbl_lock); + hash_init(esw->offloads.decap_tbl); + mlx5e_mod_hdr_tbl_init(&esw->offloads.mod_hdr); + atomic64_set(&esw->offloads.num_flows, 0); + ida_init(&esw->offloads.vport_metadata_ida); + xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC); + mutex_init(&esw->state_lock); + init_rwsem(&esw->mode_lock); + refcount_set(&esw->qos.refcnt, 0); + + esw->enabled_vports = 0; + esw->mode = MLX5_ESWITCH_LEGACY; + esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) && + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) + esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC; + else + esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; + if (MLX5_ESWITCH_MANAGER(dev) && + mlx5_esw_vport_match_metadata_supported(esw)) + esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; + + dev->priv.eswitch = esw; + BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head); + + esw_info(dev, + "Total vports %d, per vport: max uc(%d) max mc(%d)\n", + esw->total_vports, + MLX5_MAX_UC_PER_VPORT(dev), + MLX5_MAX_MC_PER_VPORT(dev)); + return 0; + +reps_err: + mlx5_esw_vports_cleanup(esw); +abort: + if (esw->work_queue) + destroy_workqueue(esw->work_queue); + debugfs_remove_recursive(esw->debugfs_root); + devl_params_unregister(priv_to_devlink(dev), mlx5_eswitch_params, + ARRAY_SIZE(mlx5_eswitch_params)); +free_esw: + kfree(esw); + return err; +} + +void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) +{ + if (!esw) + return; + + esw_info(esw->dev, "cleanup\n"); + + esw->dev->priv.eswitch = NULL; + destroy_workqueue(esw->work_queue); + WARN_ON(refcount_read(&esw->qos.refcnt)); + mutex_destroy(&esw->state_lock); + WARN_ON(!xa_empty(&esw->offloads.vhca_map)); + xa_destroy(&esw->offloads.vhca_map); + ida_destroy(&esw->offloads.vport_metadata_ida); + mlx5e_mod_hdr_tbl_destroy(&esw->offloads.mod_hdr); + mutex_destroy(&esw->offloads.encap_tbl_lock); + mutex_destroy(&esw->offloads.decap_tbl_lock); + esw_offloads_cleanup(esw); + mlx5_esw_vports_cleanup(esw); + debugfs_remove_recursive(esw->debugfs_root); + devl_params_unregister(priv_to_devlink(esw->dev), mlx5_eswitch_params, + ARRAY_SIZE(mlx5_eswitch_params)); + kfree(esw); +} + +/* Vport Administration */ +static int +mlx5_esw_set_vport_mac_locked(struct mlx5_eswitch *esw, + struct mlx5_vport *evport, const u8 *mac) +{ + u16 vport_num = evport->vport; + u64 node_guid; + int err = 0; + + if (is_multicast_ether_addr(mac)) + return -EINVAL; + + if (evport->info.spoofchk && !is_valid_ether_addr(mac)) + mlx5_core_warn(esw->dev, + "Set invalid MAC while spoofchk is on, vport(%d)\n", + vport_num); + + err = mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, mac); + if (err) { + mlx5_core_warn(esw->dev, + "Failed to mlx5_modify_nic_vport_mac vport(%d) err=(%d)\n", + vport_num, err); + return err; + } + + node_guid_gen_from_mac(&node_guid, mac); + err = mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, node_guid); + if (err) + mlx5_core_warn(esw->dev, + "Failed to set vport %d node guid, err = %d. RDMA_CM will not function properly for this VF.\n", + vport_num, err); + + ether_addr_copy(evport->info.mac, mac); + evport->info.node_guid = node_guid; + if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) + err = esw_acl_ingress_lgcy_setup(esw, evport); + + return err; +} + +int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, + u16 vport, const u8 *mac) +{ + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + int err = 0; + + if (IS_ERR(evport)) + return PTR_ERR(evport); + + mutex_lock(&esw->state_lock); + err = mlx5_esw_set_vport_mac_locked(esw, evport, mac); + mutex_unlock(&esw->state_lock); + return err; +} + +static bool mlx5_esw_check_port_type(struct mlx5_eswitch *esw, u16 vport_num, xa_mark_t mark) +{ + return xa_get_mark(&esw->vports, vport_num, mark); +} + +bool mlx5_eswitch_is_vf_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + return mlx5_esw_check_port_type(esw, vport_num, MLX5_ESW_VPT_VF); +} + +bool mlx5_eswitch_is_pf_vf_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + return vport_num == MLX5_VPORT_PF || + mlx5_eswitch_is_vf_vport(esw, vport_num); +} + +bool mlx5_esw_is_sf_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + return mlx5_esw_check_port_type(esw, vport_num, MLX5_ESW_VPT_SF); +} + +int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, + u16 vport, int link_state) +{ + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + int opmod = MLX5_VPORT_STATE_OP_MOD_ESW_VPORT; + int other_vport = 1; + int err = 0; + + if (!mlx5_esw_allowed(esw)) + return -EPERM; + if (IS_ERR(evport)) + return PTR_ERR(evport); + + if (vport == MLX5_VPORT_UPLINK) { + opmod = MLX5_VPORT_STATE_OP_MOD_UPLINK; + other_vport = 0; + vport = 0; + } + mutex_lock(&esw->state_lock); + if (esw->mode != MLX5_ESWITCH_LEGACY) { + err = -EOPNOTSUPP; + goto unlock; + } + + err = mlx5_modify_vport_admin_state(esw->dev, opmod, vport, other_vport, link_state); + if (err) { + mlx5_core_warn(esw->dev, "Failed to set vport %d link state, opmod = %d, err = %d", + vport, opmod, err); + goto unlock; + } + + evport->info.link_state = link_state; + +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, + u16 vport, struct ifla_vf_info *ivi) +{ + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + + if (IS_ERR(evport)) + return PTR_ERR(evport); + + memset(ivi, 0, sizeof(*ivi)); + ivi->vf = vport - 1; + + mutex_lock(&esw->state_lock); + ether_addr_copy(ivi->mac, evport->info.mac); + ivi->linkstate = evport->info.link_state; + ivi->vlan = evport->info.vlan; + ivi->qos = evport->info.qos; + ivi->spoofchk = evport->info.spoofchk; + ivi->trusted = evport->info.trusted; + if (evport->qos.enabled) { + ivi->min_tx_rate = evport->qos.min_rate; + ivi->max_tx_rate = evport->qos.max_rate; + } + mutex_unlock(&esw->state_lock); + + return 0; +} + +int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + u16 vport, u16 vlan, u8 qos, u8 set_flags) +{ + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + bool vst_mode_steering = esw_vst_mode_is_steering(esw); + int err = 0; + + if (IS_ERR(evport)) + return PTR_ERR(evport); + if (vlan > 4095 || qos > 7) + return -EINVAL; + + if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering) { + err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags); + if (err) + return err; + } + + evport->info.vlan = vlan; + evport->info.qos = qos; + if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) { + err = esw_acl_ingress_lgcy_setup(esw, evport); + if (err) + return err; + err = esw_acl_egress_lgcy_setup(esw, evport); + } + + return err; +} + +int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, + u16 vport_num, + struct ifla_vf_stats *vf_stats) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); + u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {}; + struct mlx5_vport_drop_stats stats = {}; + int err = 0; + u32 *out; + + if (IS_ERR(vport)) + return PTR_ERR(vport); + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_vport_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_VPORT_COUNTER); + MLX5_SET(query_vport_counter_in, in, op_mod, 0); + MLX5_SET(query_vport_counter_in, in, vport_number, vport->vport); + MLX5_SET(query_vport_counter_in, in, other_vport, 1); + + err = mlx5_cmd_exec_inout(esw->dev, query_vport_counter, in, out); + if (err) + goto free_out; + + #define MLX5_GET_CTR(p, x) \ + MLX5_GET64(query_vport_counter_out, p, x) + + memset(vf_stats, 0, sizeof(*vf_stats)); + vf_stats->rx_packets = + MLX5_GET_CTR(out, received_eth_unicast.packets) + + MLX5_GET_CTR(out, received_ib_unicast.packets) + + MLX5_GET_CTR(out, received_eth_multicast.packets) + + MLX5_GET_CTR(out, received_ib_multicast.packets) + + MLX5_GET_CTR(out, received_eth_broadcast.packets); + + vf_stats->rx_bytes = + MLX5_GET_CTR(out, received_eth_unicast.octets) + + MLX5_GET_CTR(out, received_ib_unicast.octets) + + MLX5_GET_CTR(out, received_eth_multicast.octets) + + MLX5_GET_CTR(out, received_ib_multicast.octets) + + MLX5_GET_CTR(out, received_eth_broadcast.octets); + + vf_stats->tx_packets = + MLX5_GET_CTR(out, transmitted_eth_unicast.packets) + + MLX5_GET_CTR(out, transmitted_ib_unicast.packets) + + MLX5_GET_CTR(out, transmitted_eth_multicast.packets) + + MLX5_GET_CTR(out, transmitted_ib_multicast.packets) + + MLX5_GET_CTR(out, transmitted_eth_broadcast.packets); + + vf_stats->tx_bytes = + MLX5_GET_CTR(out, transmitted_eth_unicast.octets) + + MLX5_GET_CTR(out, transmitted_ib_unicast.octets) + + MLX5_GET_CTR(out, transmitted_eth_multicast.octets) + + MLX5_GET_CTR(out, transmitted_ib_multicast.octets) + + MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); + + vf_stats->multicast = + MLX5_GET_CTR(out, received_eth_multicast.packets) + + MLX5_GET_CTR(out, received_ib_multicast.packets); + + vf_stats->broadcast = + MLX5_GET_CTR(out, received_eth_broadcast.packets); + + err = mlx5_esw_query_vport_drop_stats(esw->dev, vport, &stats); + if (err) + goto free_out; + vf_stats->rx_dropped = stats.rx_dropped; + vf_stats->tx_dropped = stats.tx_dropped; + +free_out: + kvfree(out); + return err; +} + +u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw = dev->priv.eswitch; + + return mlx5_esw_allowed(esw) ? esw->mode : MLX5_ESWITCH_LEGACY; +} +EXPORT_SYMBOL_GPL(mlx5_eswitch_mode); + +enum devlink_eswitch_encap_mode +mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw; + + esw = dev->priv.eswitch; + return (mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS) ? esw->offloads.encap : + DEVLINK_ESWITCH_ENCAP_MODE_NONE; +} +EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode); + +bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, + struct mlx5_core_dev *dev1) +{ + return (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS && + dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS); +} + +int mlx5_esw_event_notifier_register(struct mlx5_eswitch *esw, struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&esw->n_head, nb); +} + +void mlx5_esw_event_notifier_unregister(struct mlx5_eswitch *esw, struct notifier_block *nb) +{ + blocking_notifier_chain_unregister(&esw->n_head, nb); +} + +/** + * mlx5_esw_hold() - Try to take a read lock on esw mode lock. + * @mdev: mlx5 core device. + * + * Should be called by esw resources callers. + * + * Return: true on success or false. + */ +bool mlx5_esw_hold(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + + /* e.g. VF doesn't have eswitch so nothing to do */ + if (!mlx5_esw_allowed(esw)) + return true; + + if (down_read_trylock(&esw->mode_lock) != 0) { + if (esw->eswitch_operation_in_progress) { + up_read(&esw->mode_lock); + return false; + } + return true; + } + + return false; +} + +/** + * mlx5_esw_release() - Release a read lock on esw mode lock. + * @mdev: mlx5 core device. + */ +void mlx5_esw_release(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + + if (mlx5_esw_allowed(esw)) + up_read(&esw->mode_lock); +} + +/** + * mlx5_esw_get() - Increase esw user count. + * @mdev: mlx5 core device. + */ +void mlx5_esw_get(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + + if (mlx5_esw_allowed(esw)) + atomic64_inc(&esw->user_count); +} + +/** + * mlx5_esw_put() - Decrease esw user count. + * @mdev: mlx5 core device. + */ +void mlx5_esw_put(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + + if (mlx5_esw_allowed(esw)) + atomic64_dec_if_positive(&esw->user_count); +} + +/** + * mlx5_esw_try_lock() - Take a write lock on esw mode lock. + * @esw: eswitch device. + * + * Should be called by esw mode change routine. + * + * Return: + * * 0 - esw mode if successfully locked and refcount is 0. + * * -EBUSY - refcount is not 0. + * * -EINVAL - In the middle of switching mode or lock is already held. + */ +int mlx5_esw_try_lock(struct mlx5_eswitch *esw) +{ + if (down_write_trylock(&esw->mode_lock) == 0) + return -EINVAL; + + if (esw->eswitch_operation_in_progress || + atomic64_read(&esw->user_count) > 0) { + up_write(&esw->mode_lock); + return -EBUSY; + } + + return esw->mode; +} + +int mlx5_esw_lock(struct mlx5_eswitch *esw) +{ + down_write(&esw->mode_lock); + + if (esw->eswitch_operation_in_progress) { + up_write(&esw->mode_lock); + return -EBUSY; + } + + return 0; +} + +/** + * mlx5_esw_unlock() - Release write lock on esw mode lock + * @esw: eswitch device. + */ +void mlx5_esw_unlock(struct mlx5_eswitch *esw) +{ + up_write(&esw->mode_lock); +} + +/** + * mlx5_eswitch_get_total_vports - Get total vports of the eswitch + * + * @dev: Pointer to core device + * + * mlx5_eswitch_get_total_vports returns total number of eswitch vports. + */ +u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw; + + esw = dev->priv.eswitch; + return mlx5_esw_allowed(esw) ? esw->total_vports : 0; +} +EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports); + +/** + * mlx5_eswitch_get_core_dev - Get the mdev device + * @esw : eswitch device. + * + * Return the mellanox core device which manages the eswitch. + */ +struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw) +{ + return mlx5_esw_allowed(esw) ? esw->dev : NULL; +} +EXPORT_SYMBOL(mlx5_eswitch_get_core_dev); + +bool mlx5_eswitch_block_ipsec(struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw = dev->priv.eswitch; + + if (!mlx5_esw_allowed(esw)) + return true; + + mutex_lock(&esw->state_lock); + if (esw->enabled_ipsec_vf_count) { + mutex_unlock(&esw->state_lock); + return false; + } + + dev->num_ipsec_offloads++; + mutex_unlock(&esw->state_lock); + return true; +} + +void mlx5_eswitch_unblock_ipsec(struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw = dev->priv.eswitch; + + if (!mlx5_esw_allowed(esw)) + /* Failure means no eswitch => core dev is not a PF */ + return; + + mutex_lock(&esw->state_lock); + dev->num_ipsec_offloads--; + mutex_unlock(&esw->state_lock); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h new file mode 100644 index 0000000000..b4eb17141e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -0,0 +1,959 @@ +/* + * Copyright (c) 2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_ESWITCH_H__ +#define __MLX5_ESWITCH_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "lib/mpfs.h" +#include "lib/fs_chains.h" +#include "sf/sf.h" +#include "en/tc_ct.h" +#include "en/tc/sample.h" + +enum mlx5_mapped_obj_type { + MLX5_MAPPED_OBJ_CHAIN, + MLX5_MAPPED_OBJ_SAMPLE, + MLX5_MAPPED_OBJ_INT_PORT_METADATA, + MLX5_MAPPED_OBJ_ACT_MISS, +}; + +struct mlx5_mapped_obj { + enum mlx5_mapped_obj_type type; + union { + u32 chain; + u64 act_miss_cookie; + struct { + u32 group_id; + u32 rate; + u32 trunc_size; + u32 tunnel_id; + } sample; + u32 int_port_metadata; + }; +}; + +#ifdef CONFIG_MLX5_ESWITCH + +#define ESW_OFFLOADS_DEFAULT_NUM_GROUPS 15 + +#define MLX5_MAX_UC_PER_VPORT(dev) \ + (1 << MLX5_CAP_GEN(dev, log_max_current_uc_list)) + +#define MLX5_MAX_MC_PER_VPORT(dev) \ + (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list)) + +#define mlx5_esw_has_fwd_fdb(dev) \ + MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table) + +#define esw_chains(esw) \ + ((esw)->fdb_table.offloads.esw_chains_priv) + +enum { + MAPPING_TYPE_CHAIN, + MAPPING_TYPE_TUNNEL, + MAPPING_TYPE_TUNNEL_ENC_OPTS, + MAPPING_TYPE_LABELS, + MAPPING_TYPE_ZONE, + MAPPING_TYPE_INT_PORT, +}; + +struct vport_ingress { + struct mlx5_flow_table *acl; + struct mlx5_flow_handle *allow_rule; + struct { + struct mlx5_flow_group *allow_spoofchk_only_grp; + struct mlx5_flow_group *allow_untagged_spoofchk_grp; + struct mlx5_flow_group *allow_untagged_only_grp; + struct mlx5_flow_group *drop_grp; + struct mlx5_flow_handle *drop_rule; + struct mlx5_fc *drop_counter; + } legacy; + struct { + /* Optional group to add an FTE to do internal priority + * tagging on ingress packets. + */ + struct mlx5_flow_group *metadata_prio_tag_grp; + /* Group to add default match-all FTE entry to tag ingress + * packet with metadata. + */ + struct mlx5_flow_group *metadata_allmatch_grp; + /* Optional group to add a drop all rule */ + struct mlx5_flow_group *drop_grp; + struct mlx5_modify_hdr *modify_metadata; + struct mlx5_flow_handle *modify_metadata_rule; + struct mlx5_flow_handle *drop_rule; + } offloads; +}; + +enum vport_egress_acl_type { + VPORT_EGRESS_ACL_TYPE_DEFAULT, + VPORT_EGRESS_ACL_TYPE_SHARED_FDB, +}; + +struct vport_egress { + struct mlx5_flow_table *acl; + enum vport_egress_acl_type type; + struct mlx5_flow_handle *allowed_vlan; + struct mlx5_flow_group *vlan_grp; + union { + struct { + struct mlx5_flow_group *drop_grp; + struct mlx5_flow_handle *drop_rule; + struct mlx5_fc *drop_counter; + } legacy; + struct { + struct mlx5_flow_group *fwd_grp; + struct mlx5_flow_handle *fwd_rule; + struct xarray bounce_rules; + struct mlx5_flow_group *bounce_grp; + } offloads; + }; +}; + +struct mlx5_vport_drop_stats { + u64 rx_dropped; + u64 tx_dropped; +}; + +struct mlx5_vport_info { + u8 mac[ETH_ALEN]; + u16 vlan; + u64 node_guid; + int link_state; + u8 qos; + u8 spoofchk: 1; + u8 trusted: 1; + u8 roce_enabled: 1; + u8 mig_enabled: 1; + u8 ipsec_crypto_enabled: 1; + u8 ipsec_packet_enabled: 1; +}; + +/* Vport context events */ +enum mlx5_eswitch_vport_event { + MLX5_VPORT_UC_ADDR_CHANGE = BIT(0), + MLX5_VPORT_MC_ADDR_CHANGE = BIT(1), + MLX5_VPORT_PROMISC_CHANGE = BIT(3), +}; + +struct mlx5_vport; + +struct mlx5_devlink_port { + struct devlink_port dl_port; + struct mlx5_vport *vport; +}; + +static inline void mlx5_devlink_port_init(struct mlx5_devlink_port *dl_port, + struct mlx5_vport *vport) +{ + dl_port->vport = vport; +} + +static inline struct mlx5_devlink_port *mlx5_devlink_port_get(struct devlink_port *dl_port) +{ + return container_of(dl_port, struct mlx5_devlink_port, dl_port); +} + +static inline struct mlx5_vport *mlx5_devlink_port_vport_get(struct devlink_port *dl_port) +{ + return mlx5_devlink_port_get(dl_port)->vport; +} + +struct mlx5_vport { + struct mlx5_core_dev *dev; + struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE]; + struct hlist_head mc_list[MLX5_L2_ADDR_HASH_SIZE]; + struct mlx5_flow_handle *promisc_rule; + struct mlx5_flow_handle *allmulti_rule; + struct work_struct vport_change_handler; + + struct vport_ingress ingress; + struct vport_egress egress; + u32 default_metadata; + u32 metadata; + + struct mlx5_vport_info info; + + struct { + bool enabled; + u32 esw_tsar_ix; + u32 bw_share; + u32 min_rate; + u32 max_rate; + struct mlx5_esw_rate_group *group; + } qos; + + u16 vport; + bool enabled; + enum mlx5_eswitch_vport_event enabled_events; + int index; + struct mlx5_devlink_port *dl_port; +}; + +struct mlx5_esw_indir_table; + +struct mlx5_eswitch_fdb { + union { + struct legacy_fdb { + struct mlx5_flow_table *fdb; + struct mlx5_flow_group *addr_grp; + struct mlx5_flow_group *allmulti_grp; + struct mlx5_flow_group *promisc_grp; + struct mlx5_flow_table *vepa_fdb; + struct mlx5_flow_handle *vepa_uplink_rule; + struct mlx5_flow_handle *vepa_star_rule; + } legacy; + + struct offloads_fdb { + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *tc_miss_table; + struct mlx5_flow_table *slow_fdb; + struct mlx5_flow_group *send_to_vport_grp; + struct mlx5_flow_group *send_to_vport_meta_grp; + struct mlx5_flow_group *peer_miss_grp; + struct mlx5_flow_handle **peer_miss_rules[MLX5_MAX_PORTS]; + struct mlx5_flow_group *miss_grp; + struct mlx5_flow_handle **send_to_vport_meta_rules; + struct mlx5_flow_handle *miss_rule_uni; + struct mlx5_flow_handle *miss_rule_multi; + + struct mlx5_fs_chains *esw_chains_priv; + struct { + DECLARE_HASHTABLE(table, 8); + /* Protects vports.table */ + struct mutex lock; + } vports; + + struct mlx5_esw_indir_table *indir; + + } offloads; + }; + u32 flags; +}; + +struct mlx5_esw_offload { + struct mlx5_flow_table *ft_offloads_restore; + struct mlx5_flow_group *restore_group; + struct mlx5_modify_hdr *restore_copy_hdr_id; + struct mapping_ctx *reg_c0_obj_pool; + + struct mlx5_flow_table *ft_offloads; + struct mlx5_flow_group *vport_rx_group; + struct mlx5_flow_group *vport_rx_drop_group; + struct mlx5_flow_handle *vport_rx_drop_rule; + struct mlx5_flow_table *ft_ipsec_tx_pol; + struct xarray vport_reps; + struct list_head peer_flows[MLX5_MAX_PORTS]; + struct mutex peer_mutex; + struct mutex encap_tbl_lock; /* protects encap_tbl */ + DECLARE_HASHTABLE(encap_tbl, 8); + struct mutex decap_tbl_lock; /* protects decap_tbl */ + DECLARE_HASHTABLE(decap_tbl, 8); + struct mod_hdr_tbl mod_hdr; + DECLARE_HASHTABLE(termtbl_tbl, 8); + struct mutex termtbl_mutex; /* protects termtbl hash */ + struct xarray vhca_map; + const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES]; + u8 inline_mode; + atomic64_t num_flows; + u64 num_block_encap; + u64 num_block_mode; + enum devlink_eswitch_encap_mode encap; + struct ida vport_metadata_ida; + unsigned int host_number; /* ECPF supports one external host */ +}; + +/* E-Switch MC FDB table hash node */ +struct esw_mc_addr { /* SRIOV only */ + struct l2addr_node node; + struct mlx5_flow_handle *uplink_rule; /* Forward to uplink rule */ + u32 refcnt; +}; + +struct mlx5_host_work { + struct work_struct work; + struct mlx5_eswitch *esw; +}; + +struct mlx5_esw_functions { + struct mlx5_nb nb; + u16 num_vfs; + u16 num_ec_vfs; +}; + +enum { + MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0), + MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED = BIT(1), + MLX5_ESWITCH_VPORT_ACL_NS_CREATED = BIT(2), +}; + +struct mlx5_esw_bridge_offloads; + +enum { + MLX5_ESW_FDB_CREATED = BIT(0), +}; + +struct dentry; + +struct mlx5_eswitch { + struct mlx5_core_dev *dev; + struct mlx5_nb nb; + struct mlx5_eswitch_fdb fdb_table; + /* legacy data structures */ + struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; + struct esw_mc_addr mc_promisc; + /* end of legacy */ + struct dentry *debugfs_root; + struct workqueue_struct *work_queue; + struct xarray vports; + u32 flags; + int total_vports; + int enabled_vports; + /* Synchronize between vport change events + * and async SRIOV admin state changes + */ + struct mutex state_lock; + + /* Protects eswitch mode change that occurs via one or more + * user commands, i.e. sriov state change, devlink commands. + */ + struct rw_semaphore mode_lock; + atomic64_t user_count; + + struct { + u32 root_tsar_ix; + struct mlx5_esw_rate_group *group0; + struct list_head groups; /* Protected by esw->state_lock */ + + /* Protected by esw->state_lock. + * Initially 0, meaning no QoS users and QoS is disabled. + */ + refcount_t refcnt; + } qos; + + struct mlx5_esw_bridge_offloads *br_offloads; + struct mlx5_esw_offload offloads; + int mode; + u16 manager_vport; + u16 first_host_vport; + u8 num_peers; + struct mlx5_esw_functions esw_funcs; + struct { + u32 large_group_num; + } params; + struct blocking_notifier_head n_head; + struct xarray paired; + struct mlx5_devcom_comp_dev *devcom; + u16 enabled_ipsec_vf_count; + bool eswitch_operation_in_progress; +}; + +void esw_offloads_disable(struct mlx5_eswitch *esw); +int esw_offloads_enable(struct mlx5_eswitch *esw); +void esw_offloads_cleanup(struct mlx5_eswitch *esw); +int esw_offloads_init(struct mlx5_eswitch *esw); + +struct mlx5_flow_handle * +mlx5_eswitch_add_send_to_vport_meta_rule(struct mlx5_eswitch *esw, u16 vport_num); +void mlx5_eswitch_del_send_to_vport_meta_rule(struct mlx5_flow_handle *rule); + +bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw); +u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw); +void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata); + +int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps); + +/* E-Switch API */ +int mlx5_eswitch_init(struct mlx5_core_dev *dev); +void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); + +#define MLX5_ESWITCH_IGNORE_NUM_VFS (-1) +int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs); +int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs); +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf); +void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw); +void mlx5_eswitch_disable(struct mlx5_eswitch *esw); +void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, u64 key); +void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw); +bool mlx5_esw_offloads_devcom_is_ready(struct mlx5_eswitch *esw); +int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, + u16 vport, const u8 *mac); +int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, + u16 vport, int link_state); +int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + u16 vport, u16 vlan, u8 qos); +int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, + u16 vport, bool spoofchk); +int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, + u16 vport_num, bool setting); +int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, + u32 max_rate, u32 min_rate); +int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack); +int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting); +int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting); +int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, + u16 vport, struct ifla_vf_info *ivi); +int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, + u16 vport, + struct ifla_vf_stats *vf_stats); +void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule); + +int mlx5_eswitch_modify_esw_vport_context(struct mlx5_core_dev *dev, u16 vport, + bool other_vport, void *in); + +struct mlx5_flow_spec; +struct mlx5_esw_flow_attr; +struct mlx5_termtbl_handle; + +bool +mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_spec *spec); + +struct mlx5_flow_handle * +mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int num_dest); + +void +mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw, + struct mlx5_termtbl_handle *tt); + +void +mlx5_eswitch_clear_rule_source_port(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec); + +struct mlx5_flow_handle * +mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); +struct mlx5_flow_handle * +mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); +void +mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr); +void +mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr); + +struct mlx5_flow_handle * +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, + struct mlx5_flow_destination *dest); + +enum { + SET_VLAN_STRIP = BIT(0), + SET_VLAN_INSERT = BIT(1) +}; + +enum mlx5_flow_match_level { + MLX5_MATCH_NONE = MLX5_INLINE_MODE_NONE, + MLX5_MATCH_L2 = MLX5_INLINE_MODE_L2, + MLX5_MATCH_L3 = MLX5_INLINE_MODE_IP, + MLX5_MATCH_L4 = MLX5_INLINE_MODE_TCP_UDP, +}; + +/* current maximum for flow based vport multicasting */ +#define MLX5_MAX_FLOW_FWD_VPORTS 32 + +enum { + MLX5_ESW_DEST_ENCAP = BIT(0), + MLX5_ESW_DEST_ENCAP_VALID = BIT(1), + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE = BIT(2), +}; + +struct mlx5_esw_flow_attr { + struct mlx5_eswitch_rep *in_rep; + struct mlx5_core_dev *in_mdev; + struct mlx5_core_dev *counter_dev; + struct mlx5e_tc_int_port *dest_int_port; + struct mlx5e_tc_int_port *int_port; + + int split_count; + int out_count; + + __be16 vlan_proto[MLX5_FS_VLAN_DEPTH]; + u16 vlan_vid[MLX5_FS_VLAN_DEPTH]; + u8 vlan_prio[MLX5_FS_VLAN_DEPTH]; + u8 total_vlan; + struct { + u32 flags; + bool vport_valid; + u16 vport; + struct mlx5_pkt_reformat *pkt_reformat; + struct mlx5_core_dev *mdev; + struct mlx5_termtbl_handle *termtbl; + int src_port_rewrite_act_id; + } dests[MLX5_MAX_FLOW_FWD_VPORTS]; + struct mlx5_rx_tun_attr *rx_tun_attr; + struct ethhdr eth; + struct mlx5_pkt_reformat *decap_pkt_reformat; +}; + +int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, + struct netlink_ext_ack *extack); +int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode); +int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, + struct netlink_ext_ack *extack); +int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode); +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, + enum devlink_eswitch_encap_mode encap, + struct netlink_ext_ack *extack); +int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, + enum devlink_eswitch_encap_mode *encap); +int mlx5_devlink_port_fn_hw_addr_get(struct devlink_port *port, + u8 *hw_addr, int *hw_addr_len, + struct netlink_ext_ack *extack); +int mlx5_devlink_port_fn_hw_addr_set(struct devlink_port *port, + const u8 *hw_addr, int hw_addr_len, + struct netlink_ext_ack *extack); +int mlx5_devlink_port_fn_roce_get(struct devlink_port *port, bool *is_enabled, + struct netlink_ext_ack *extack); +int mlx5_devlink_port_fn_roce_set(struct devlink_port *port, bool enable, + struct netlink_ext_ack *extack); +int mlx5_devlink_port_fn_migratable_get(struct devlink_port *port, bool *is_enabled, + struct netlink_ext_ack *extack); +int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable, + struct netlink_ext_ack *extack); +#ifdef CONFIG_XFRM_OFFLOAD +int mlx5_devlink_port_fn_ipsec_crypto_get(struct devlink_port *port, bool *is_enabled, + struct netlink_ext_ack *extack); +int mlx5_devlink_port_fn_ipsec_crypto_set(struct devlink_port *port, bool enable, + struct netlink_ext_ack *extack); +int mlx5_devlink_port_fn_ipsec_packet_get(struct devlink_port *port, bool *is_enabled, + struct netlink_ext_ack *extack); +int mlx5_devlink_port_fn_ipsec_packet_set(struct devlink_port *port, bool enable, + struct netlink_ext_ack *extack); +#endif /* CONFIG_XFRM_OFFLOAD */ +void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type); + +int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + u16 vport, u16 vlan, u8 qos, u8 set_flags); + +static inline bool esw_vst_mode_is_steering(struct mlx5_eswitch *esw) +{ + return (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, pop_vlan) && + MLX5_CAP_ESW_INGRESS_ACL(esw->dev, push_vlan)); +} + +static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev, + u8 vlan_depth) +{ + bool ret = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, pop_vlan) && + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan); + + if (vlan_depth == 1) + return ret; + + return ret && MLX5_CAP_ESW_FLOWTABLE_FDB(dev, pop_vlan_2) && + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan_2); +} + +bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, + struct mlx5_core_dev *dev1); + +const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev); + +#define MLX5_DEBUG_ESWITCH_MASK BIT(3) + +#define esw_info(__dev, format, ...) \ + dev_info((__dev)->device, "E-Switch: " format, ##__VA_ARGS__) + +#define esw_warn(__dev, format, ...) \ + dev_warn((__dev)->device, "E-Switch: " format, ##__VA_ARGS__) + +#define esw_debug(dev, format, ...) \ + mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) + +static inline bool mlx5_esw_allowed(const struct mlx5_eswitch *esw) +{ + return esw && MLX5_ESWITCH_MANAGER(esw->dev); +} + +/* The returned number is valid only when the dev is eswitch manager. */ +static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) +{ + return mlx5_core_is_ecpf_esw_manager(dev) ? + MLX5_VPORT_ECPF : MLX5_VPORT_PF; +} + +static inline bool +mlx5_esw_is_manager_vport(const struct mlx5_eswitch *esw, u16 vport_num) +{ + return esw->manager_vport == vport_num; +} + +static inline bool mlx5_esw_is_owner(struct mlx5_eswitch *esw, u16 vport_num, + u16 esw_owner_vhca_id) +{ + return esw_owner_vhca_id == MLX5_CAP_GEN(esw->dev, vhca_id) || + (vport_num == MLX5_VPORT_UPLINK && mlx5_lag_is_master(esw->dev)); +} + +static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev) +{ + return mlx5_core_is_ecpf_esw_manager(dev) ? + MLX5_VPORT_PF : MLX5_VPORT_FIRST_VF; +} + +static inline bool mlx5_eswitch_is_funcs_handler(const struct mlx5_core_dev *dev) +{ + return mlx5_core_is_ecpf_esw_manager(dev); +} + +static inline unsigned int +mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev, + u16 vport_num) +{ + return (MLX5_CAP_GEN(dev, vhca_id) << 16) | vport_num; +} + +static inline u16 +mlx5_esw_devlink_port_index_to_vport_num(unsigned int dl_port_index) +{ + return dl_port_index & 0xffff; +} + +static inline bool mlx5_esw_is_fdb_created(struct mlx5_eswitch *esw) +{ + return esw->fdb_table.flags & MLX5_ESW_FDB_CREATED; +} + +/* TODO: This mlx5e_tc function shouldn't be called by eswitch */ +void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); + +/* Each mark identifies eswitch vport type. + * MLX5_ESW_VPT_HOST_FN is used to identify both PF and VF ports using + * a single mark. + * MLX5_ESW_VPT_VF identifies a SRIOV VF vport. + * MLX5_ESW_VPT_SF identifies SF vport. + */ +#define MLX5_ESW_VPT_HOST_FN XA_MARK_0 +#define MLX5_ESW_VPT_VF XA_MARK_1 +#define MLX5_ESW_VPT_SF XA_MARK_2 + +/* The vport iterator is valid only after vport are initialized in mlx5_eswitch_init. + * Borrowed the idea from xa_for_each_marked() but with support for desired last element. + */ + +#define mlx5_esw_for_each_vport(esw, index, vport) \ + xa_for_each(&((esw)->vports), index, vport) + +#define mlx5_esw_for_each_entry_marked(xa, index, entry, last, filter) \ + for (index = 0, entry = xa_find(xa, &index, last, filter); \ + entry; entry = xa_find_after(xa, &index, last, filter)) + +#define mlx5_esw_for_each_vport_marked(esw, index, vport, last, filter) \ + mlx5_esw_for_each_entry_marked(&((esw)->vports), index, vport, last, filter) + +#define mlx5_esw_for_each_vf_vport(esw, index, vport, last) \ + mlx5_esw_for_each_vport_marked(esw, index, vport, last, MLX5_ESW_VPT_VF) + +#define mlx5_esw_for_each_host_func_vport(esw, index, vport, last) \ + mlx5_esw_for_each_vport_marked(esw, index, vport, last, MLX5_ESW_VPT_HOST_FN) + +/* This macro should only be used if EC SRIOV is enabled. + * + * Because there were no more marks available on the xarray this uses a + * for_each_range approach. The range is only valid when EC SRIOV is enabled + */ +#define mlx5_esw_for_each_ec_vf_vport(esw, index, vport, last) \ + xa_for_each_range(&((esw)->vports), \ + index, \ + vport, \ + MLX5_CAP_GEN_2((esw->dev), ec_vf_vport_base), \ + MLX5_CAP_GEN_2((esw->dev), ec_vf_vport_base) +\ + (last) - 1) + +struct mlx5_eswitch *__must_check +mlx5_devlink_eswitch_get(struct devlink *devlink); + +struct mlx5_eswitch *mlx5_devlink_eswitch_nocheck_get(struct devlink *devlink); + +struct mlx5_vport *__must_check +mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num); + +bool mlx5_eswitch_is_vf_vport(struct mlx5_eswitch *esw, u16 vport_num); +bool mlx5_eswitch_is_pf_vf_vport(struct mlx5_eswitch *esw, u16 vport_num); +bool mlx5_esw_is_sf_vport(struct mlx5_eswitch *esw, u16 vport_num); + +int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data); + +int +mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, + enum mlx5_eswitch_vport_event enabled_events); +void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw); + +int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + enum mlx5_eswitch_vport_event enabled_events); +void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport); + +int +esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); +void +esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); + +struct esw_vport_tbl_namespace { + int max_fte; + int max_num_groups; + u32 flags; +}; + +struct mlx5_vport_tbl_attr { + u32 chain; + u16 prio; + u16 vport; + struct esw_vport_tbl_namespace *vport_ns; +}; + +struct mlx5_flow_table * +mlx5_esw_vporttbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr); +void +mlx5_esw_vporttbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr); + +struct mlx5_flow_handle * +esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag); + +void mlx5_esw_set_flow_group_source_port(struct mlx5_eswitch *esw, + u32 *flow_group_in, + int match_params); + +void mlx5_esw_set_spec_source_port(struct mlx5_eswitch *esw, + u16 vport, + struct mlx5_flow_spec *spec); + +int mlx5_esw_offloads_init_pf_vf_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void mlx5_esw_offloads_cleanup_pf_vf_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport); + +int mlx5_esw_offloads_init_sf_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + struct mlx5_devlink_port *dl_port, + u32 controller, u32 sfnum); +void mlx5_esw_offloads_cleanup_sf_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport); + +int mlx5_esw_offloads_load_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void mlx5_esw_offloads_unload_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport); + +int mlx5_eswitch_load_sf_vport(struct mlx5_eswitch *esw, u16 vport_num, + enum mlx5_eswitch_vport_event enabled_events, + struct mlx5_devlink_port *dl_port, u32 controller, u32 sfnum); +void mlx5_eswitch_unload_sf_vport(struct mlx5_eswitch *esw, u16 vport_num); + +int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs, + enum mlx5_eswitch_vport_event enabled_events); +void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs); + +int mlx5_esw_offloads_pf_vf_devlink_port_init(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); +void mlx5_esw_offloads_pf_vf_devlink_port_cleanup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); + +int mlx5_esw_offloads_sf_devlink_port_init(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + struct mlx5_devlink_port *dl_port, + u32 controller, u32 sfnum); +void mlx5_esw_offloads_sf_devlink_port_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); + +int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num); + +int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *sf_base_id); + +int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num); +void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num); +int mlx5_eswitch_vhca_id_to_vport(struct mlx5_eswitch *esw, u16 vhca_id, u16 *vport_num); + +/** + * mlx5_esw_event_info - Indicates eswitch mode changed/changing. + * + * @new_mode: New mode of eswitch. + */ +struct mlx5_esw_event_info { + u16 new_mode; +}; + +int mlx5_esw_event_notifier_register(struct mlx5_eswitch *esw, struct notifier_block *n); +void mlx5_esw_event_notifier_unregister(struct mlx5_eswitch *esw, struct notifier_block *n); + +bool mlx5_esw_hold(struct mlx5_core_dev *dev); +void mlx5_esw_release(struct mlx5_core_dev *dev); +void mlx5_esw_get(struct mlx5_core_dev *dev); +void mlx5_esw_put(struct mlx5_core_dev *dev); +int mlx5_esw_try_lock(struct mlx5_eswitch *esw); +int mlx5_esw_lock(struct mlx5_eswitch *esw); +void mlx5_esw_unlock(struct mlx5_eswitch *esw); + +void esw_vport_change_handle_locked(struct mlx5_vport *vport); + +bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller); + +int mlx5_eswitch_offloads_single_fdb_add_one(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw, int max_slaves); +void mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw); +int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw); + +bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev); +void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev); + +int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev); +void mlx5_eswitch_unblock_mode(struct mlx5_core_dev *dev); + +static inline int mlx5_eswitch_num_vfs(struct mlx5_eswitch *esw) +{ + if (mlx5_esw_allowed(esw)) + return esw->esw_funcs.num_vfs; + + return 0; +} + +static inline int mlx5_eswitch_get_npeers(struct mlx5_eswitch *esw) +{ + if (mlx5_esw_allowed(esw)) + return esw->num_peers; + return 0; +} + +static inline struct mlx5_flow_table * +mlx5_eswitch_get_slow_fdb(struct mlx5_eswitch *esw) +{ + return esw->fdb_table.offloads.slow_fdb; +} + +int mlx5_eswitch_restore_ipsec_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule, + struct mlx5_esw_flow_attr *esw_attr, int attr_idx); +bool mlx5_eswitch_block_ipsec(struct mlx5_core_dev *dev); +void mlx5_eswitch_unblock_ipsec(struct mlx5_core_dev *dev); +bool mlx5_esw_ipsec_vf_offload_supported(struct mlx5_core_dev *dev); +int mlx5_esw_ipsec_vf_offload_get(struct mlx5_core_dev *dev, + struct mlx5_vport *vport); +int mlx5_esw_ipsec_vf_crypto_offload_supported(struct mlx5_core_dev *dev, + u16 vport_num); +int mlx5_esw_ipsec_vf_crypto_offload_set(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + bool enable); +int mlx5_esw_ipsec_vf_packet_offload_set(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + bool enable); +int mlx5_esw_ipsec_vf_packet_offload_supported(struct mlx5_core_dev *dev, + u16 vport_num); +void mlx5_esw_vport_ipsec_offload_enable(struct mlx5_eswitch *esw); +void mlx5_esw_vport_ipsec_offload_disable(struct mlx5_eswitch *esw); + +#else /* CONFIG_MLX5_ESWITCH */ +/* eswitch API stubs */ +static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } +static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} +static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; } +static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) {} +static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {} +static inline void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, u64 key) {} +static inline void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) {} +static inline bool mlx5_esw_offloads_devcom_is_ready(struct mlx5_eswitch *esw) { return false; } +static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } +static inline +int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { return 0; } +static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline struct mlx5_flow_handle * +esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline unsigned int +mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev, + u16 vport_num) +{ + return vport_num; +} + +static inline int +mlx5_eswitch_offloads_single_fdb_add_one(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw, int max_slaves) +{ + return 0; +} + +static inline void +mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw) {} + +static inline int mlx5_eswitch_get_npeers(struct mlx5_eswitch *esw) { return 0; } + +static inline int +mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw) +{ + return 0; +} + +static inline bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev) +{ + return true; +} + +static inline void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev) +{ +} + +static inline int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev) { return 0; } +static inline void mlx5_eswitch_unblock_mode(struct mlx5_core_dev *dev) {} +static inline bool mlx5_eswitch_block_ipsec(struct mlx5_core_dev *dev) +{ + return false; +} + +static inline void mlx5_eswitch_unblock_ipsec(struct mlx5_core_dev *dev) {} +#endif /* CONFIG_MLX5_ESWITCH */ + +#endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c new file mode 100644 index 0000000000..b0455134c9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -0,0 +1,4577 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include "mlx5_core.h" +#include "eswitch.h" +#include "esw/indir_table.h" +#include "esw/acl/ofld.h" +#include "rdma.h" +#include "en.h" +#include "fs_core.h" +#include "lib/devcom.h" +#include "lib/eq.h" +#include "lib/fs_chains.h" +#include "en_tc.h" +#include "en/mapping.h" +#include "devlink.h" +#include "lag/lag.h" +#include "en/tc/post_meter.h" + +#define mlx5_esw_for_each_rep(esw, i, rep) \ + xa_for_each(&((esw)->offloads.vport_reps), i, rep) + +/* There are two match-all miss flows, one for unicast dst mac and + * one for multicast. + */ +#define MLX5_ESW_MISS_FLOWS (2) +#define UPLINK_REP_INDEX 0 + +#define MLX5_ESW_VPORT_TBL_SIZE 128 +#define MLX5_ESW_VPORT_TBL_NUM_GROUPS 4 + +#define MLX5_ESW_FT_OFFLOADS_DROP_RULE (1) + +static struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = { + .max_fte = MLX5_ESW_VPORT_TBL_SIZE, + .max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS, + .flags = 0, +}; + +static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw, + u16 vport_num) +{ + return xa_load(&esw->offloads.vport_reps, vport_num); +} + +static void +mlx5_eswitch_set_rule_flow_source(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr) +{ + if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) || !attr || !attr->in_rep) + return; + + if (attr->int_port) { + spec->flow_context.flow_source = mlx5e_tc_int_port_get_flow_source(attr->int_port); + + return; + } + + spec->flow_context.flow_source = (attr->in_rep->vport == MLX5_VPORT_UPLINK) ? + MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK : + MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; +} + +/* Actually only the upper 16 bits of reg c0 need to be cleared, but the lower 16 bits + * are not needed as well in the following process. So clear them all for simplicity. + */ +void +mlx5_eswitch_clear_rule_source_port(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec) +{ + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + void *misc2; + + misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, 0); + + misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, 0); + + if (!memchr_inv(misc2, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc2))) + spec->match_criteria_enable &= ~MLX5_MATCH_MISC_PARAMETERS_2; + } +} + +static void +mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr, + struct mlx5_eswitch *src_esw, + u16 vport) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + u32 metadata; + void *misc2; + void *misc; + + /* Use metadata matching because vport is not represented by single + * VHCA in dual-port RoCE mode, and matching on source vport may fail. + */ + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + if (mlx5_esw_indir_table_decap_vport(attr)) + vport = mlx5_esw_indir_table_decap_vport(attr); + + if (!attr->chain && esw_attr && esw_attr->int_port) + metadata = + mlx5e_tc_int_port_get_metadata_for_match(esw_attr->int_port); + else + metadata = + mlx5_eswitch_get_vport_metadata_for_match(src_esw, vport); + + misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, metadata); + + misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + MLX5_SET(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(src_esw->dev, vhca_id)); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + } +} + +static int +esw_setup_decap_indir(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr) +{ + struct mlx5_flow_table *ft; + + if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE)) + return -EOPNOTSUPP; + + ft = mlx5_esw_indir_table_get(esw, attr, + mlx5_esw_indir_table_decap_vport(attr), true); + return PTR_ERR_OR_ZERO(ft); +} + +static void +esw_cleanup_decap_indir(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr) +{ + if (mlx5_esw_indir_table_decap_vport(attr)) + mlx5_esw_indir_table_put(esw, + mlx5_esw_indir_table_decap_vport(attr), + true); +} + +static int +esw_setup_mtu_dest(struct mlx5_flow_destination *dest, + struct mlx5e_meter_attr *meter, + int i) +{ + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_RANGE; + dest[i].range.field = MLX5_FLOW_DEST_RANGE_FIELD_PKT_LEN; + dest[i].range.min = 0; + dest[i].range.max = meter->params.mtu; + dest[i].range.hit_ft = mlx5e_post_meter_get_mtu_true_ft(meter->post_meter); + dest[i].range.miss_ft = mlx5e_post_meter_get_mtu_false_ft(meter->post_meter); + + return 0; +} + +static int +esw_setup_sampler_dest(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + u32 sampler_id, + int i) +{ + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER; + dest[i].sampler_id = sampler_id; + + return 0; +} + +static int +esw_setup_ft_dest(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + int i) +{ + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = attr->dest_ft; + + if (mlx5_esw_indir_table_decap_vport(attr)) + return esw_setup_decap_indir(esw, attr); + return 0; +} + +static void +esw_setup_accept_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, + struct mlx5_fs_chains *chains, int i) +{ + if (mlx5_chains_ignore_flow_level_supported(chains)) + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = mlx5_chains_get_tc_end_ft(chains); +} + +static void +esw_setup_slow_path_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, int i) +{ + if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = mlx5_eswitch_get_slow_fdb(esw); +} + +static int +esw_setup_chain_dest(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_fs_chains *chains, + u32 chain, u32 prio, u32 level, + int i) +{ + struct mlx5_flow_table *ft; + + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + ft = mlx5_chains_get_table(chains, chain, prio, level); + if (IS_ERR(ft)) + return PTR_ERR(ft); + + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = ft; + return 0; +} + +static void esw_put_dest_tables_loop(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr, + int from, int to) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + int i; + + for (i = from; i < to; i++) + if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) + mlx5_chains_put_table(chains, 0, 1, 0); + else if (mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].vport, + esw_attr->dests[i].mdev)) + mlx5_esw_indir_table_put(esw, esw_attr->dests[i].vport, false); +} + +static bool +esw_is_chain_src_port_rewrite(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr) +{ + int i; + + for (i = esw_attr->split_count; i < esw_attr->out_count; i++) + if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) + return true; + return false; +} + +static int +esw_setup_chain_src_port_rewrite(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, + struct mlx5_fs_chains *chains, + struct mlx5_flow_attr *attr, + int *i) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + int err; + + if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE)) + return -EOPNOTSUPP; + + /* flow steering cannot handle more than one dest with the same ft + * in a single flow + */ + if (esw_attr->out_count - esw_attr->split_count > 1) + return -EOPNOTSUPP; + + err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, 1, 0, *i); + if (err) + return err; + + if (esw_attr->dests[esw_attr->split_count].pkt_reformat) { + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act->pkt_reformat = esw_attr->dests[esw_attr->split_count].pkt_reformat; + } + (*i)++; + + return 0; +} + +static void esw_cleanup_chain_src_port_rewrite(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + + esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, esw_attr->out_count); +} + +static bool +esw_is_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + bool result = false; + int i; + + /* Indirect table is supported only for flows with in_port uplink + * and the destination is vport on the same eswitch as the uplink, + * return false in case at least one of destinations doesn't meet + * this criteria. + */ + for (i = esw_attr->split_count; i < esw_attr->out_count; i++) { + if (esw_attr->dests[i].vport_valid && + mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].vport, + esw_attr->dests[i].mdev)) { + result = true; + } else { + result = false; + break; + } + } + return result; +} + +static int +esw_setup_indir_table(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + int *i) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + int j, err; + + if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE)) + return -EOPNOTSUPP; + + for (j = esw_attr->split_count; j < esw_attr->out_count; j++, (*i)++) { + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[*i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + + dest[*i].ft = mlx5_esw_indir_table_get(esw, attr, + esw_attr->dests[j].vport, false); + if (IS_ERR(dest[*i].ft)) { + err = PTR_ERR(dest[*i].ft); + goto err_indir_tbl_get; + } + } + + if (mlx5_esw_indir_table_decap_vport(attr)) { + err = esw_setup_decap_indir(esw, attr); + if (err) + goto err_indir_tbl_get; + } + + return 0; + +err_indir_tbl_get: + esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, j); + return err; +} + +static void esw_cleanup_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + + esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, esw_attr->out_count); + esw_cleanup_decap_indir(esw, attr); +} + +static void +esw_cleanup_chain_dest(struct mlx5_fs_chains *chains, u32 chain, u32 prio, u32 level) +{ + mlx5_chains_put_table(chains, chain, prio, level); +} + +static bool esw_same_vhca_id(struct mlx5_core_dev *mdev1, struct mlx5_core_dev *mdev2) +{ + return MLX5_CAP_GEN(mdev1, vhca_id) == MLX5_CAP_GEN(mdev2, vhca_id); +} + +static bool esw_setup_uplink_fwd_ipsec_needed(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *esw_attr, + int attr_idx) +{ + if (esw->offloads.ft_ipsec_tx_pol && + esw_attr->dests[attr_idx].vport_valid && + esw_attr->dests[attr_idx].vport == MLX5_VPORT_UPLINK && + /* To be aligned with software, encryption is needed only for tunnel device */ + (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP_VALID) && + esw_attr->dests[attr_idx].vport != esw_attr->in_rep->vport && + esw_same_vhca_id(esw_attr->dests[attr_idx].mdev, esw->dev)) + return true; + + return false; +} + +static bool esw_flow_dests_fwd_ipsec_check(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *esw_attr) +{ + int i; + + if (!esw->offloads.ft_ipsec_tx_pol) + return true; + + for (i = 0; i < esw_attr->split_count; i++) + if (esw_setup_uplink_fwd_ipsec_needed(esw, esw_attr, i)) + return false; + + for (i = esw_attr->split_count; i < esw_attr->out_count; i++) + if (esw_setup_uplink_fwd_ipsec_needed(esw, esw_attr, i) && + (esw_attr->out_count - esw_attr->split_count > 1)) + return false; + + return true; +} + +static void +esw_setup_dest_fwd_vport(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr, + int attr_idx, int dest_idx, bool pkt_reformat) +{ + dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest[dest_idx].vport.num = esw_attr->dests[attr_idx].vport; + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { + dest[dest_idx].vport.vhca_id = + MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id); + dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + if (dest[dest_idx].vport.num == MLX5_VPORT_UPLINK && + mlx5_lag_is_mpesw(esw->dev)) + dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; + } + if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP_VALID) { + if (pkt_reformat) { + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act->pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat; + } + dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; + dest[dest_idx].vport.pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat; + } +} + +static void +esw_setup_dest_fwd_ipsec(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr, + int attr_idx, int dest_idx, bool pkt_reformat) +{ + dest[dest_idx].ft = esw->offloads.ft_ipsec_tx_pol; + dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + if (pkt_reformat && + esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP_VALID) { + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act->pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat; + } +} + +static void +esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr, + int attr_idx, int dest_idx, bool pkt_reformat) +{ + if (esw_setup_uplink_fwd_ipsec_needed(esw, esw_attr, attr_idx)) + esw_setup_dest_fwd_ipsec(dest, flow_act, esw, esw_attr, + attr_idx, dest_idx, pkt_reformat); + else + esw_setup_dest_fwd_vport(dest, flow_act, esw, esw_attr, + attr_idx, dest_idx, pkt_reformat); +} + +static int +esw_setup_vport_dests(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr, + int i) +{ + int j; + + for (j = esw_attr->split_count; j < esw_attr->out_count; j++, i++) + esw_setup_vport_dest(dest, flow_act, esw, esw_attr, j, i, true); + return i; +} + +static bool +esw_src_port_rewrite_supported(struct mlx5_eswitch *esw) +{ + return MLX5_CAP_GEN(esw->dev, reg_c_preserve) && + mlx5_eswitch_vport_match_metadata_enabled(esw) && + MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level); +} + +static bool +esw_dests_to_vf_pf_vports(struct mlx5_flow_destination *dests, int max_dest) +{ + bool vf_dest = false, pf_dest = false; + int i; + + for (i = 0; i < max_dest; i++) { + if (dests[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT) + continue; + + if (dests[i].vport.num == MLX5_VPORT_UPLINK) + pf_dest = true; + else + vf_dest = true; + + if (vf_dest && pf_dest) + return true; + } + + return false; +} + +static int +esw_setup_dests(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_flow_spec *spec, + int *i) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + int err = 0; + + if (!mlx5_eswitch_termtbl_required(esw, attr, flow_act, spec) && + esw_src_port_rewrite_supported(esw)) + attr->flags |= MLX5_ATTR_FLAG_SRC_REWRITE; + + if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) { + esw_setup_slow_path_dest(dest, flow_act, esw, *i); + (*i)++; + goto out; + } + + if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) { + esw_setup_sampler_dest(dest, flow_act, attr->sample_attr.sampler_id, *i); + (*i)++; + } else if (attr->flags & MLX5_ATTR_FLAG_ACCEPT) { + esw_setup_accept_dest(dest, flow_act, chains, *i); + (*i)++; + } else if (attr->flags & MLX5_ATTR_FLAG_MTU) { + err = esw_setup_mtu_dest(dest, &attr->meter_attr, *i); + (*i)++; + } else if (esw_is_indir_table(esw, attr)) { + err = esw_setup_indir_table(dest, flow_act, esw, attr, i); + } else if (esw_is_chain_src_port_rewrite(esw, esw_attr)) { + err = esw_setup_chain_src_port_rewrite(dest, flow_act, esw, chains, attr, i); + } else { + *i = esw_setup_vport_dests(dest, flow_act, esw, esw_attr, *i); + + if (attr->dest_ft) { + err = esw_setup_ft_dest(dest, flow_act, esw, attr, *i); + (*i)++; + } else if (attr->dest_chain) { + err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, + 1, 0, *i); + (*i)++; + } + } + +out: + return err; +} + +static void +esw_cleanup_dests(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + + if (attr->dest_ft) { + esw_cleanup_decap_indir(esw, attr); + } else if (!mlx5e_tc_attr_flags_skip(attr->flags)) { + if (attr->dest_chain) + esw_cleanup_chain_dest(chains, attr->dest_chain, 1, 0); + else if (esw_is_indir_table(esw, attr)) + esw_cleanup_indir_table(esw, attr); + else if (esw_is_chain_src_port_rewrite(esw, esw_attr)) + esw_cleanup_chain_src_port_rewrite(esw, attr); + } +} + +static void +esw_setup_meter(struct mlx5_flow_attr *attr, struct mlx5_flow_act *flow_act) +{ + struct mlx5e_flow_meter_handle *meter; + + meter = attr->meter_attr.meter; + flow_act->exe_aso.type = attr->exe_aso_type; + flow_act->exe_aso.object_id = meter->obj_id; + flow_act->exe_aso.flow_meter.meter_idx = meter->idx; + flow_act->exe_aso.flow_meter.init_color = MLX5_FLOW_METER_COLOR_GREEN; + /* use metadata reg 5 for packet color */ + flow_act->exe_aso.return_reg_id = 5; +} + +struct mlx5_flow_handle * +mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr) +{ + struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, }; + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + bool split = !!(esw_attr->split_count); + struct mlx5_vport_tbl_attr fwd_attr; + struct mlx5_flow_destination *dest; + struct mlx5_flow_handle *rule; + struct mlx5_flow_table *fdb; + int i = 0; + + if (esw->mode != MLX5_ESWITCH_OFFLOADS) + return ERR_PTR(-EOPNOTSUPP); + + if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) + return ERR_PTR(-EOPNOTSUPP); + + if (!esw_flow_dests_fwd_ipsec_check(esw, esw_attr)) + return ERR_PTR(-EOPNOTSUPP); + + dest = kcalloc(MLX5_MAX_FLOW_FWD_VPORTS + 1, sizeof(*dest), GFP_KERNEL); + if (!dest) + return ERR_PTR(-ENOMEM); + + flow_act.action = attr->action; + + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) { + flow_act.vlan[0].ethtype = ntohs(esw_attr->vlan_proto[0]); + flow_act.vlan[0].vid = esw_attr->vlan_vid[0]; + flow_act.vlan[0].prio = esw_attr->vlan_prio[0]; + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) { + flow_act.vlan[1].ethtype = ntohs(esw_attr->vlan_proto[1]); + flow_act.vlan[1].vid = esw_attr->vlan_vid[1]; + flow_act.vlan[1].prio = esw_attr->vlan_prio[1]; + } + } + + mlx5_eswitch_set_rule_flow_source(esw, spec, esw_attr); + + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + int err; + + err = esw_setup_dests(dest, &flow_act, esw, attr, spec, &i); + if (err) { + rule = ERR_PTR(err); + goto err_create_goto_table; + } + + /* Header rewrite with combined wire+loopback in FDB is not allowed */ + if ((flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) && + esw_dests_to_vf_pf_vports(dest, i)) { + esw_warn(esw->dev, + "FDB: Header rewrite with forwarding to both PF and VF is not allowed\n"); + rule = ERR_PTR(-EINVAL); + goto err_esw_get; + } + } + + if (esw_attr->decap_pkt_reformat) + flow_act.pkt_reformat = esw_attr->decap_pkt_reformat; + + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[i].counter_id = mlx5_fc_id(attr->counter); + i++; + } + + if (attr->outer_match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + if (attr->inner_match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; + + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + flow_act.modify_hdr = attr->modify_hdr; + + if ((flow_act.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) && + attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER) + esw_setup_meter(attr, &flow_act); + + if (split) { + fwd_attr.chain = attr->chain; + fwd_attr.prio = attr->prio; + fwd_attr.vport = esw_attr->in_rep->vport; + fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns; + + fdb = mlx5_esw_vporttbl_get(esw, &fwd_attr); + } else { + if (attr->chain || attr->prio) + fdb = mlx5_chains_get_table(chains, attr->chain, + attr->prio, 0); + else + fdb = attr->ft; + + if (!(attr->flags & MLX5_ATTR_FLAG_NO_IN_PORT)) + mlx5_eswitch_set_rule_source_port(esw, spec, attr, + esw_attr->in_mdev->priv.eswitch, + esw_attr->in_rep->vport); + } + if (IS_ERR(fdb)) { + rule = ERR_CAST(fdb); + goto err_esw_get; + } + + if (!i) { + kfree(dest); + dest = NULL; + } + + if (mlx5_eswitch_termtbl_required(esw, attr, &flow_act, spec)) + rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, esw_attr, + &flow_act, dest, i); + else + rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i); + if (IS_ERR(rule)) + goto err_add_rule; + else + atomic64_inc(&esw->offloads.num_flows); + + kfree(dest); + return rule; + +err_add_rule: + if (split) + mlx5_esw_vporttbl_put(esw, &fwd_attr); + else if (attr->chain || attr->prio) + mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); +err_esw_get: + esw_cleanup_dests(esw, attr); +err_create_goto_table: + kfree(dest); + return rule; +} + +struct mlx5_flow_handle * +mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr) +{ + struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, }; + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + struct mlx5_vport_tbl_attr fwd_attr; + struct mlx5_flow_destination *dest; + struct mlx5_flow_table *fast_fdb; + struct mlx5_flow_table *fwd_fdb; + struct mlx5_flow_handle *rule; + int i, err = 0; + + dest = kcalloc(MLX5_MAX_FLOW_FWD_VPORTS + 1, sizeof(*dest), GFP_KERNEL); + if (!dest) + return ERR_PTR(-ENOMEM); + + fast_fdb = mlx5_chains_get_table(chains, attr->chain, attr->prio, 0); + if (IS_ERR(fast_fdb)) { + rule = ERR_CAST(fast_fdb); + goto err_get_fast; + } + + fwd_attr.chain = attr->chain; + fwd_attr.prio = attr->prio; + fwd_attr.vport = esw_attr->in_rep->vport; + fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns; + fwd_fdb = mlx5_esw_vporttbl_get(esw, &fwd_attr); + if (IS_ERR(fwd_fdb)) { + rule = ERR_CAST(fwd_fdb); + goto err_get_fwd; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + for (i = 0; i < esw_attr->split_count; i++) { + if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) + /* Source port rewrite (forward to ovs internal port or statck device) isn't + * supported in the rule of split action. + */ + err = -EOPNOTSUPP; + else + esw_setup_vport_dest(dest, &flow_act, esw, esw_attr, i, i, false); + + if (err) { + rule = ERR_PTR(err); + goto err_chain_src_rewrite; + } + } + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = fwd_fdb; + i++; + + mlx5_eswitch_set_rule_source_port(esw, spec, attr, + esw_attr->in_mdev->priv.eswitch, + esw_attr->in_rep->vport); + + if (attr->outer_match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i); + + if (IS_ERR(rule)) { + i = esw_attr->split_count; + goto err_chain_src_rewrite; + } + + atomic64_inc(&esw->offloads.num_flows); + + kfree(dest); + return rule; +err_chain_src_rewrite: + mlx5_esw_vporttbl_put(esw, &fwd_attr); +err_get_fwd: + mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); +err_get_fast: + kfree(dest); + return rule; +} + +static void +__mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr, + bool fwd_rule) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + bool split = (esw_attr->split_count > 0); + struct mlx5_vport_tbl_attr fwd_attr; + int i; + + mlx5_del_flow_rules(rule); + + if (!mlx5e_tc_attr_flags_skip(attr->flags)) { + /* unref the term table */ + for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { + if (esw_attr->dests[i].termtbl) + mlx5_eswitch_termtbl_put(esw, esw_attr->dests[i].termtbl); + } + } + + atomic64_dec(&esw->offloads.num_flows); + + if (fwd_rule || split) { + fwd_attr.chain = attr->chain; + fwd_attr.prio = attr->prio; + fwd_attr.vport = esw_attr->in_rep->vport; + fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns; + } + + if (fwd_rule) { + mlx5_esw_vporttbl_put(esw, &fwd_attr); + mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); + } else { + if (split) + mlx5_esw_vporttbl_put(esw, &fwd_attr); + else if (attr->chain || attr->prio) + mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); + esw_cleanup_dests(esw, attr); + } +} + +void +mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr) +{ + __mlx5_eswitch_del_rule(esw, rule, attr, false); +} + +void +mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr) +{ + __mlx5_eswitch_del_rule(esw, rule, attr, true); +} + +struct mlx5_flow_handle * +mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, + struct mlx5_eswitch *from_esw, + struct mlx5_eswitch_rep *rep, + u32 sqn) +{ + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_spec *spec; + void *misc; + u16 vport; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + flow_rule = ERR_PTR(-ENOMEM); + goto out; + } + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + + /* source vport is the esw manager */ + vport = from_esw->manager_vport; + + if (mlx5_eswitch_vport_match_metadata_enabled(on_esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(from_esw, vport)); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + + if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch)) + MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(from_esw->dev, vhca_id)); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch)) + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + } + + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = rep->vport; + dest.vport.vhca_id = MLX5_CAP_GEN(rep->esw->dev, vhca_id); + dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + if (rep->vport == MLX5_VPORT_UPLINK && + on_esw == from_esw && on_esw->offloads.ft_ipsec_tx_pol) { + dest.ft = on_esw->offloads.ft_ipsec_tx_pol; + flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + } else { + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = rep->vport; + dest.vport.vhca_id = MLX5_CAP_GEN(rep->esw->dev, vhca_id); + dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + } + + if (MLX5_CAP_ESW_FLOWTABLE(on_esw->dev, flow_source) && + rep->vport == MLX5_VPORT_UPLINK) + spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; + + flow_rule = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(on_esw), + spec, &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) + esw_warn(on_esw->dev, "FDB: Failed to add send to vport rule err %ld\n", + PTR_ERR(flow_rule)); +out: + kvfree(spec); + return flow_rule; +} +EXPORT_SYMBOL(mlx5_eswitch_add_send_to_vport_rule); + +void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule) +{ + mlx5_del_flow_rules(rule); +} + +void mlx5_eswitch_del_send_to_vport_meta_rule(struct mlx5_flow_handle *rule) +{ + if (rule) + mlx5_del_flow_rules(rule); +} + +struct mlx5_flow_handle * +mlx5_eswitch_add_send_to_vport_meta_rule(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_spec *spec; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + MLX5_SET(fte_match_param, spec->match_criteria, + misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); + MLX5_SET(fte_match_param, spec->match_criteria, + misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_1, + ESW_TUN_SLOW_TABLE_GOTO_VPORT_MARK); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num)); + dest.vport.num = vport_num; + + flow_rule = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), + spec, &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) + esw_warn(esw->dev, "FDB: Failed to add send to vport meta rule vport %d, err %ld\n", + vport_num, PTR_ERR(flow_rule)); + + kvfree(spec); + return flow_rule; +} + +static bool mlx5_eswitch_reg_c1_loopback_supported(struct mlx5_eswitch *esw) +{ + return MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) & + MLX5_FDB_TO_VPORT_REG_C_1; +} + +static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; + u32 min[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {}; + u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {}; + u8 curr, wanted; + int err; + + if (!mlx5_eswitch_reg_c1_loopback_supported(esw) && + !mlx5_eswitch_vport_match_metadata_enabled(esw)) + return 0; + + MLX5_SET(query_esw_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT); + err = mlx5_cmd_exec_inout(esw->dev, query_esw_vport_context, in, out); + if (err) + return err; + + curr = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.fdb_to_vport_reg_c_id); + wanted = MLX5_FDB_TO_VPORT_REG_C_0; + if (mlx5_eswitch_reg_c1_loopback_supported(esw)) + wanted |= MLX5_FDB_TO_VPORT_REG_C_1; + + if (enable) + curr |= wanted; + else + curr &= ~wanted; + + MLX5_SET(modify_esw_vport_context_in, min, + esw_vport_context.fdb_to_vport_reg_c_id, curr); + MLX5_SET(modify_esw_vport_context_in, min, + field_select.fdb_to_vport_reg_c_id, 1); + + err = mlx5_eswitch_modify_esw_vport_context(esw->dev, 0, false, min); + if (!err) { + if (enable && (curr & MLX5_FDB_TO_VPORT_REG_C_1)) + esw->flags |= MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED; + else + esw->flags &= ~MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED; + } + + return err; +} + +static void peer_miss_rules_setup(struct mlx5_eswitch *esw, + struct mlx5_core_dev *peer_dev, + struct mlx5_flow_spec *spec, + struct mlx5_flow_destination *dest) +{ + void *misc; + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(peer_dev, vhca_id)); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + } + + dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest->vport.num = peer_dev->priv.eswitch->manager_vport; + dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id); + dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; +} + +static void esw_set_peer_miss_rule_source_port(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw, + struct mlx5_flow_spec *spec, + u16 vport) +{ + void *misc; + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(peer_esw, + vport)); + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + } +} + +static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, + struct mlx5_core_dev *peer_dev) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_handle **flows; + /* total vports is the same for both e-switches */ + int nvports = esw->total_vports; + struct mlx5_flow_handle *flow; + struct mlx5_flow_spec *spec; + struct mlx5_vport *vport; + int err, pfindex; + unsigned long i; + void *misc; + + if (!MLX5_VPORT_MANAGER(esw->dev) && !mlx5_core_is_ecpf_esw_manager(esw->dev)) + return 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + peer_miss_rules_setup(esw, peer_dev, spec, &dest); + + flows = kvcalloc(nvports, sizeof(*flows), GFP_KERNEL); + if (!flows) { + err = -ENOMEM; + goto alloc_flows_err; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); + esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch, + spec, MLX5_VPORT_PF); + + flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_pf_flow_err; + } + flows[vport->index] = flow; + } + + if (mlx5_ecpf_vport_exists(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); + MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF); + flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_ecpf_flow_err; + } + flows[vport->index] = flow; + } + + mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) { + esw_set_peer_miss_rule_source_port(esw, + peer_dev->priv.eswitch, + spec, vport->vport); + + flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_vf_flow_err; + } + flows[vport->index] = flow; + } + + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { + if (i >= mlx5_core_max_ec_vfs(peer_dev)) + break; + esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch, + spec, vport->vport); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_ec_vf_flow_err; + } + flows[vport->index] = flow; + } + } + + pfindex = mlx5_get_dev_index(peer_dev); + if (pfindex >= MLX5_MAX_PORTS) { + esw_warn(esw->dev, "Peer dev index(%d) is over the max num defined(%d)\n", + pfindex, MLX5_MAX_PORTS); + err = -EINVAL; + goto add_ec_vf_flow_err; + } + esw->fdb_table.offloads.peer_miss_rules[pfindex] = flows; + + kvfree(spec); + return 0; + +add_ec_vf_flow_err: + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { + if (!flows[vport->index]) + continue; + mlx5_del_flow_rules(flows[vport->index]); + } +add_vf_flow_err: + mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) { + if (!flows[vport->index]) + continue; + mlx5_del_flow_rules(flows[vport->index]); + } + if (mlx5_ecpf_vport_exists(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); + mlx5_del_flow_rules(flows[vport->index]); + } +add_ecpf_flow_err: + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); + mlx5_del_flow_rules(flows[vport->index]); + } +add_pf_flow_err: + esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err); + kvfree(flows); +alloc_flows_err: + kvfree(spec); + return err; +} + +static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw, + struct mlx5_core_dev *peer_dev) +{ + u16 peer_index = mlx5_get_dev_index(peer_dev); + struct mlx5_flow_handle **flows; + struct mlx5_vport *vport; + unsigned long i; + + flows = esw->fdb_table.offloads.peer_miss_rules[peer_index]; + if (!flows) + return; + + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { + /* The flow for a particular vport could be NULL if the other ECPF + * has fewer or no VFs enabled + */ + if (!flows[vport->index]) + continue; + mlx5_del_flow_rules(flows[vport->index]); + } + } + + mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) + mlx5_del_flow_rules(flows[vport->index]); + + if (mlx5_ecpf_vport_exists(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); + mlx5_del_flow_rules(flows[vport->index]); + } + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); + mlx5_del_flow_rules(flows[vport->index]); + } + + kvfree(flows); + esw->fdb_table.offloads.peer_miss_rules[peer_index] = NULL; +} + +static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_handle *flow_rule = NULL; + struct mlx5_flow_spec *spec; + void *headers_c; + void *headers_v; + int err = 0; + u8 *dmac_c; + u8 *dmac_v; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + err = -ENOMEM; + goto out; + } + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, + outer_headers.dmac_47_16); + dmac_c[0] = 0x01; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = esw->manager_vport; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + flow_rule = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), + spec, &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + esw_warn(esw->dev, "FDB: Failed to add unicast miss flow rule err %d\n", err); + goto out; + } + + esw->fdb_table.offloads.miss_rule_uni = flow_rule; + + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, + outer_headers.dmac_47_16); + dmac_v[0] = 0x01; + flow_rule = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), + spec, &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err); + mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni); + goto out; + } + + esw->fdb_table.offloads.miss_rule_multi = flow_rule; + +out: + kvfree(spec); + return err; +} + +struct mlx5_flow_handle * +esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) +{ + struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, }; + struct mlx5_flow_table *ft = esw->offloads.ft_offloads_restore; + struct mlx5_flow_context *flow_context; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_destination dest; + struct mlx5_flow_spec *spec; + void *misc; + + if (!mlx5_eswitch_reg_c1_loopback_supported(esw)) + return ERR_PTR(-EOPNOTSUPP); + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + ESW_REG_C0_USER_DATA_METADATA_MASK); + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, tag); + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.modify_hdr = esw->offloads.restore_copy_hdr_id; + + flow_context = &spec->flow_context; + flow_context->flags |= FLOW_CONTEXT_HAS_TAG; + flow_context->flow_tag = tag; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = esw->offloads.ft_offloads; + + flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + kvfree(spec); + + if (IS_ERR(flow_rule)) + esw_warn(esw->dev, + "Failed to create restore rule for tag: %d, err(%d)\n", + tag, (int)PTR_ERR(flow_rule)); + + return flow_rule; +} + +#define MAX_PF_SQ 256 +#define MAX_SQ_NVPORTS 32 + +void +mlx5_esw_set_flow_group_source_port(struct mlx5_eswitch *esw, + u32 *flow_group_in, + int match_params) +{ + void *match_criteria = MLX5_ADDR_OF(create_flow_group_in, + flow_group_in, + match_criteria); + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + MLX5_SET(create_flow_group_in, flow_group_in, + match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2 | match_params); + + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + } else { + MLX5_SET(create_flow_group_in, flow_group_in, + match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS | match_params); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_port); + } +} + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) +static void esw_vport_tbl_put(struct mlx5_eswitch *esw) +{ + struct mlx5_vport_tbl_attr attr; + struct mlx5_vport *vport; + unsigned long i; + + attr.chain = 0; + attr.prio = 1; + mlx5_esw_for_each_vport(esw, i, vport) { + attr.vport = vport->vport; + attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns; + mlx5_esw_vporttbl_put(esw, &attr); + } +} + +static int esw_vport_tbl_get(struct mlx5_eswitch *esw) +{ + struct mlx5_vport_tbl_attr attr; + struct mlx5_flow_table *fdb; + struct mlx5_vport *vport; + unsigned long i; + + attr.chain = 0; + attr.prio = 1; + mlx5_esw_for_each_vport(esw, i, vport) { + attr.vport = vport->vport; + attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns; + fdb = mlx5_esw_vporttbl_get(esw, &attr); + if (IS_ERR(fdb)) + goto out; + } + return 0; + +out: + esw_vport_tbl_put(esw); + return PTR_ERR(fdb); +} + +#define fdb_modify_header_fwd_to_table_supported(esw) \ + (MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table)) +static void esw_init_chains_offload_flags(struct mlx5_eswitch *esw, u32 *flags) +{ + struct mlx5_core_dev *dev = esw->dev; + + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ignore_flow_level)) + *flags |= MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED; + + if (!MLX5_CAP_ESW_FLOWTABLE(dev, multi_fdb_encap) && + esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) { + *flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED; + esw_warn(dev, "Tc chains and priorities offload aren't supported, update firmware if needed\n"); + } else if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) { + *flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED; + esw_warn(dev, "Tc chains and priorities offload aren't supported\n"); + } else if (!fdb_modify_header_fwd_to_table_supported(esw)) { + /* Disabled when ttl workaround is needed, e.g + * when ESWITCH_IPV4_TTL_MODIFY_ENABLE = true in mlxconfig + */ + esw_warn(dev, + "Tc chains and priorities offload aren't supported, check firmware version, or mlxconfig settings\n"); + *flags &= ~MLX5_CHAINS_AND_PRIOS_SUPPORTED; + } else { + *flags |= MLX5_CHAINS_AND_PRIOS_SUPPORTED; + esw_info(dev, "Supported tc chains and prios offload\n"); + } + + if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) + *flags |= MLX5_CHAINS_FT_TUNNEL_SUPPORTED; +} + +static int +esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb) +{ + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_table *nf_ft, *ft; + struct mlx5_chains_attr attr = {}; + struct mlx5_fs_chains *chains; + int err; + + esw_init_chains_offload_flags(esw, &attr.flags); + attr.ns = MLX5_FLOW_NAMESPACE_FDB; + attr.max_grp_num = esw->params.large_group_num; + attr.default_ft = miss_fdb; + attr.mapping = esw->offloads.reg_c0_obj_pool; + + chains = mlx5_chains_create(dev, &attr); + if (IS_ERR(chains)) { + err = PTR_ERR(chains); + esw_warn(dev, "Failed to create fdb chains err(%d)\n", err); + return err; + } + mlx5_chains_print_info(chains); + + esw->fdb_table.offloads.esw_chains_priv = chains; + + /* Create tc_end_ft which is the always created ft chain */ + nf_ft = mlx5_chains_get_table(chains, mlx5_chains_get_nf_ft_chain(chains), + 1, 0); + if (IS_ERR(nf_ft)) { + err = PTR_ERR(nf_ft); + goto nf_ft_err; + } + + /* Always open the root for fast path */ + ft = mlx5_chains_get_table(chains, 0, 1, 0); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto level_0_err; + } + + /* Open level 1 for split fdb rules now if prios isn't supported */ + if (!mlx5_chains_prios_supported(chains)) { + err = esw_vport_tbl_get(esw); + if (err) + goto level_1_err; + } + + mlx5_chains_set_end_ft(chains, nf_ft); + + return 0; + +level_1_err: + mlx5_chains_put_table(chains, 0, 1, 0); +level_0_err: + mlx5_chains_put_table(chains, mlx5_chains_get_nf_ft_chain(chains), 1, 0); +nf_ft_err: + mlx5_chains_destroy(chains); + esw->fdb_table.offloads.esw_chains_priv = NULL; + + return err; +} + +static void +esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains) +{ + if (!mlx5_chains_prios_supported(chains)) + esw_vport_tbl_put(esw); + mlx5_chains_put_table(chains, 0, 1, 0); + mlx5_chains_put_table(chains, mlx5_chains_get_nf_ft_chain(chains), 1, 0); + mlx5_chains_destroy(chains); +} + +#else /* CONFIG_MLX5_CLS_ACT */ + +static int +esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb) +{ return 0; } + +static void +esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains) +{} + +#endif + +static int +esw_create_send_to_vport_group(struct mlx5_eswitch *esw, + struct mlx5_flow_table *fdb, + u32 *flow_group_in, + int *ix) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + void *match_criteria; + int count, err = 0; + + memset(flow_group_in, 0, inlen); + + mlx5_esw_set_flow_group_source_port(esw, flow_group_in, MLX5_MATCH_MISC_PARAMETERS); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn); + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw) && + MLX5_CAP_ESW(esw->dev, merged_eswitch)) { + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_eswitch_owner_vhca_id); + MLX5_SET(create_flow_group_in, flow_group_in, + source_eswitch_owner_vhca_id_valid, 1); + } + + /* See comment at table_size calculation */ + count = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, *ix + count - 1); + *ix += count; + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(esw->dev, "Failed to create send-to-vport flow group err(%d)\n", err); + goto out; + } + esw->fdb_table.offloads.send_to_vport_grp = g; + +out: + return err; +} + +static int +esw_create_meta_send_to_vport_group(struct mlx5_eswitch *esw, + struct mlx5_flow_table *fdb, + u32 *flow_group_in, + int *ix) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + void *match_criteria; + int err = 0; + + if (!esw_src_port_rewrite_supported(esw)) + return 0; + + memset(flow_group_in, 0, inlen); + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK); + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix); + MLX5_SET(create_flow_group_in, flow_group_in, + end_flow_index, *ix + esw->total_vports - 1); + *ix += esw->total_vports; + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(esw->dev, + "Failed to create send-to-vport meta flow group err(%d)\n", err); + goto send_vport_meta_err; + } + esw->fdb_table.offloads.send_to_vport_meta_grp = g; + + return 0; + +send_vport_meta_err: + return err; +} + +static int +esw_create_peer_esw_miss_group(struct mlx5_eswitch *esw, + struct mlx5_flow_table *fdb, + u32 *flow_group_in, + int *ix) +{ + int max_peer_ports = (esw->total_vports - 1) * (MLX5_MAX_PORTS - 1); + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + void *match_criteria; + int err = 0; + + if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) + return 0; + + memset(flow_group_in, 0, inlen); + + mlx5_esw_set_flow_group_source_port(esw, flow_group_in, 0); + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) { + match_criteria = MLX5_ADDR_OF(create_flow_group_in, + flow_group_in, + match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_eswitch_owner_vhca_id); + + MLX5_SET(create_flow_group_in, flow_group_in, + source_eswitch_owner_vhca_id_valid, 1); + } + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + *ix + max_peer_ports); + *ix += max_peer_ports + 1; + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(esw->dev, "Failed to create peer miss flow group err(%d)\n", err); + goto out; + } + esw->fdb_table.offloads.peer_miss_grp = g; + +out: + return err; +} + +static int +esw_create_miss_group(struct mlx5_eswitch *esw, + struct mlx5_flow_table *fdb, + u32 *flow_group_in, + int *ix) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + void *match_criteria; + int err = 0; + u8 *dmac; + + memset(flow_group_in, 0, inlen); + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers.dmac_47_16); + dmac[0] = 0x01; + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + *ix + MLX5_ESW_MISS_FLOWS); + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(esw->dev, "Failed to create miss flow group err(%d)\n", err); + goto miss_err; + } + esw->fdb_table.offloads.miss_grp = g; + + err = esw_add_fdb_miss_rule(esw); + if (err) + goto miss_rule_err; + + return 0; + +miss_rule_err: + mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); +miss_err: + return err; +} + +static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *fdb = NULL; + int table_size, ix = 0, err = 0; + u32 flags = 0, *flow_group_in; + + esw_debug(esw->dev, "Create offloads FDB Tables\n"); + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + err = -EOPNOTSUPP; + goto ns_err; + } + esw->fdb_table.offloads.ns = root_ns; + err = mlx5_flow_namespace_set_mode(root_ns, + esw->dev->priv.steering->mode); + if (err) { + esw_warn(dev, "Failed to set FDB namespace steering mode\n"); + goto ns_err; + } + + /* To be strictly correct: + * MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ) + * should be: + * esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ + + * peer_esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ + * but as the peer device might not be in switchdev mode it's not + * possible. We use the fact that by default FW sets max vfs and max sfs + * to the same value on both devices. If it needs to be changed in the future note + * the peer miss group should also be created based on the number of + * total vports of the peer (currently is also uses esw->total_vports). + */ + table_size = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ) + + esw->total_vports * MLX5_MAX_PORTS + MLX5_ESW_MISS_FLOWS; + + /* create the slow path fdb with encap set, so further table instances + * can be created at run time while VFs are probed if the FW allows that. + */ + if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) + flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + + ft_attr.flags = flags; + ft_attr.max_fte = table_size; + ft_attr.prio = FDB_SLOW_PATH; + + fdb = mlx5_create_flow_table(root_ns, &ft_attr); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err); + goto slow_fdb_err; + } + esw->fdb_table.offloads.slow_fdb = fdb; + + /* Create empty TC-miss managed table. This allows plugging in following + * priorities without directly exposing their level 0 table to + * eswitch_offloads and passing it as miss_fdb to following call to + * esw_chains_create(). + */ + memset(&ft_attr, 0, sizeof(ft_attr)); + ft_attr.prio = FDB_TC_MISS; + esw->fdb_table.offloads.tc_miss_table = mlx5_create_flow_table(root_ns, &ft_attr); + if (IS_ERR(esw->fdb_table.offloads.tc_miss_table)) { + err = PTR_ERR(esw->fdb_table.offloads.tc_miss_table); + esw_warn(dev, "Failed to create TC miss FDB Table err %d\n", err); + goto tc_miss_table_err; + } + + err = esw_chains_create(esw, esw->fdb_table.offloads.tc_miss_table); + if (err) { + esw_warn(dev, "Failed to open fdb chains err(%d)\n", err); + goto fdb_chains_err; + } + + err = esw_create_send_to_vport_group(esw, fdb, flow_group_in, &ix); + if (err) + goto send_vport_err; + + err = esw_create_meta_send_to_vport_group(esw, fdb, flow_group_in, &ix); + if (err) + goto send_vport_meta_err; + + err = esw_create_peer_esw_miss_group(esw, fdb, flow_group_in, &ix); + if (err) + goto peer_miss_err; + + err = esw_create_miss_group(esw, fdb, flow_group_in, &ix); + if (err) + goto miss_err; + + kvfree(flow_group_in); + return 0; + +miss_err: + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); +peer_miss_err: + if (esw->fdb_table.offloads.send_to_vport_meta_grp) + mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_meta_grp); +send_vport_meta_err: + mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); +send_vport_err: + esw_chains_destroy(esw, esw_chains(esw)); +fdb_chains_err: + mlx5_destroy_flow_table(esw->fdb_table.offloads.tc_miss_table); +tc_miss_table_err: + mlx5_destroy_flow_table(mlx5_eswitch_get_slow_fdb(esw)); +slow_fdb_err: + /* Holds true only as long as DMFS is the default */ + mlx5_flow_namespace_set_mode(root_ns, MLX5_FLOW_STEERING_MODE_DMFS); +ns_err: + kvfree(flow_group_in); + return err; +} + +static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) +{ + if (!mlx5_eswitch_get_slow_fdb(esw)) + return; + + esw_debug(esw->dev, "Destroy offloads FDB Tables\n"); + mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi); + mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni); + mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); + if (esw->fdb_table.offloads.send_to_vport_meta_grp) + mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_meta_grp); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); + mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); + + esw_chains_destroy(esw, esw_chains(esw)); + + mlx5_destroy_flow_table(esw->fdb_table.offloads.tc_miss_table); + mlx5_destroy_flow_table(mlx5_eswitch_get_slow_fdb(esw)); + /* Holds true only as long as DMFS is the default */ + mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns, + MLX5_FLOW_STEERING_MODE_DMFS); + atomic64_set(&esw->user_count, 0); +} + +static int esw_get_nr_ft_offloads_steering_src_ports(struct mlx5_eswitch *esw) +{ + int nvports; + + nvports = esw->total_vports + MLX5_ESW_MISS_FLOWS; + if (mlx5e_tc_int_port_supported(esw)) + nvports += MLX5E_TC_MAX_INT_PORT_NUM; + + return nvports; +} + +static int esw_create_offloads_table(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_table *ft_offloads; + struct mlx5_flow_namespace *ns; + int err = 0; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS); + if (!ns) { + esw_warn(esw->dev, "Failed to get offloads flow namespace\n"); + return -EOPNOTSUPP; + } + + ft_attr.max_fte = esw_get_nr_ft_offloads_steering_src_ports(esw) + + MLX5_ESW_FT_OFFLOADS_DROP_RULE; + ft_attr.prio = 1; + + ft_offloads = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft_offloads)) { + err = PTR_ERR(ft_offloads); + esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err); + return err; + } + + esw->offloads.ft_offloads = ft_offloads; + return 0; +} + +static void esw_destroy_offloads_table(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + + mlx5_destroy_flow_table(offloads->ft_offloads); +} + +static int esw_create_vport_rx_group(struct mlx5_eswitch *esw) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + u32 *flow_group_in; + int nvports; + int err = 0; + + nvports = esw_get_nr_ft_offloads_steering_src_ports(esw); + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + mlx5_esw_set_flow_group_source_port(esw, flow_group_in, 0); + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1); + + g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in); + + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_warn(esw->dev, "Failed to create vport rx group err %d\n", err); + goto out; + } + + esw->offloads.vport_rx_group = g; +out: + kvfree(flow_group_in); + return err; +} + +static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw) +{ + mlx5_destroy_flow_group(esw->offloads.vport_rx_group); +} + +static int esw_create_vport_rx_drop_rule_index(struct mlx5_eswitch *esw) +{ + /* ft_offloads table is enlarged by MLX5_ESW_FT_OFFLOADS_DROP_RULE (1) + * for the drop rule, which is placed at the end of the table. + * So return the total of vport and int_port as rule index. + */ + return esw_get_nr_ft_offloads_steering_src_ports(esw); +} + +static int esw_create_vport_rx_drop_group(struct mlx5_eswitch *esw) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + u32 *flow_group_in; + int flow_index; + int err = 0; + + flow_index = esw_create_vport_rx_drop_rule_index(esw); + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index); + + g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in); + + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_warn(esw->dev, "Failed to create vport rx drop group err %d\n", err); + goto out; + } + + esw->offloads.vport_rx_drop_group = g; +out: + kvfree(flow_group_in); + return err; +} + +static void esw_destroy_vport_rx_drop_group(struct mlx5_eswitch *esw) +{ + if (esw->offloads.vport_rx_drop_group) + mlx5_destroy_flow_group(esw->offloads.vport_rx_drop_group); +} + +void +mlx5_esw_set_spec_source_port(struct mlx5_eswitch *esw, + u16 vport, + struct mlx5_flow_spec *spec) +{ + void *misc; + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(esw, vport)); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + } +} + +struct mlx5_flow_handle * +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_spec *spec; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + flow_rule = ERR_PTR(-ENOMEM); + goto out; + } + + mlx5_esw_set_spec_source_port(esw, vport, spec); + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec, + &flow_act, dest, 1); + if (IS_ERR(flow_rule)) { + esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule)); + goto out; + } + +out: + kvfree(spec); + return flow_rule; +} + +static int esw_create_vport_rx_drop_rule(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *flow_rule; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, NULL, + &flow_act, NULL, 0); + if (IS_ERR(flow_rule)) { + esw_warn(esw->dev, + "fs offloads: Failed to add vport rx drop rule err %ld\n", + PTR_ERR(flow_rule)); + return PTR_ERR(flow_rule); + } + + esw->offloads.vport_rx_drop_rule = flow_rule; + + return 0; +} + +static void esw_destroy_vport_rx_drop_rule(struct mlx5_eswitch *esw) +{ + if (esw->offloads.vport_rx_drop_rule) + mlx5_del_flow_rules(esw->offloads.vport_rx_drop_rule); +} + +static int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode) +{ + u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_vport *vport; + unsigned long i; + + if (!MLX5_CAP_GEN(dev, vport_group_manager)) + return -EOPNOTSUPP; + + if (!mlx5_esw_is_fdb_created(esw)) + return -EOPNOTSUPP; + + switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { + case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: + mlx5_mode = MLX5_INLINE_MODE_NONE; + goto out; + case MLX5_CAP_INLINE_MODE_L2: + mlx5_mode = MLX5_INLINE_MODE_L2; + goto out; + case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: + goto query_vports; + } + +query_vports: + mlx5_query_nic_vport_min_inline(dev, esw->first_host_vport, &prev_mlx5_mode); + mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) { + mlx5_query_nic_vport_min_inline(dev, vport->vport, &mlx5_mode); + if (prev_mlx5_mode != mlx5_mode) + return -EINVAL; + prev_mlx5_mode = mlx5_mode; + } + +out: + *mode = mlx5_mode; + return 0; +} + +static void esw_destroy_restore_table(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + + if (!mlx5_eswitch_reg_c1_loopback_supported(esw)) + return; + + mlx5_modify_header_dealloc(esw->dev, offloads->restore_copy_hdr_id); + mlx5_destroy_flow_group(offloads->restore_group); + mlx5_destroy_flow_table(offloads->ft_offloads_restore); +} + +static int esw_create_restore_table(struct mlx5_eswitch *esw) +{ + u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *ns; + struct mlx5_modify_hdr *mod_hdr; + void *match_criteria, *misc; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *g; + u32 *flow_group_in; + int err = 0; + + if (!mlx5_eswitch_reg_c1_loopback_supported(esw)) + return 0; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS); + if (!ns) { + esw_warn(esw->dev, "Failed to get offloads flow namespace\n"); + return -EOPNOTSUPP; + } + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) { + err = -ENOMEM; + goto out_free; + } + + ft_attr.max_fte = 1 << ESW_REG_C0_USER_DATA_METADATA_BITS; + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + esw_warn(esw->dev, "Failed to create restore table, err %d\n", + err); + goto out_free; + } + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + misc = MLX5_ADDR_OF(fte_match_param, match_criteria, + misc_parameters_2); + + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + ESW_REG_C0_USER_DATA_METADATA_MASK); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ft_attr.max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + g = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create restore flow group, err: %d\n", + err); + goto err_group; + } + + MLX5_SET(copy_action_in, modact, action_type, MLX5_ACTION_TYPE_COPY); + MLX5_SET(copy_action_in, modact, src_field, + MLX5_ACTION_IN_FIELD_METADATA_REG_C_1); + MLX5_SET(copy_action_in, modact, dst_field, + MLX5_ACTION_IN_FIELD_METADATA_REG_B); + mod_hdr = mlx5_modify_header_alloc(esw->dev, + MLX5_FLOW_NAMESPACE_KERNEL, 1, + modact); + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + esw_warn(dev, "Failed to create restore mod header, err: %d\n", + err); + goto err_mod_hdr; + } + + esw->offloads.ft_offloads_restore = ft; + esw->offloads.restore_group = g; + esw->offloads.restore_copy_hdr_id = mod_hdr; + + kvfree(flow_group_in); + + return 0; + +err_mod_hdr: + mlx5_destroy_flow_group(g); +err_group: + mlx5_destroy_flow_table(ft); +out_free: + kvfree(flow_group_in); + + return err; +} + +static int esw_offloads_start(struct mlx5_eswitch *esw, + struct netlink_ext_ack *extack) +{ + int err; + + esw->mode = MLX5_ESWITCH_OFFLOADS; + err = mlx5_eswitch_enable_locked(esw, esw->dev->priv.sriov.num_vfs); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed setting eswitch to offloads"); + esw->mode = MLX5_ESWITCH_LEGACY; + mlx5_rescan_drivers(esw->dev); + return err; + } + if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) { + if (mlx5_eswitch_inline_mode_get(esw, + &esw->offloads.inline_mode)) { + esw->offloads.inline_mode = MLX5_INLINE_MODE_L2; + NL_SET_ERR_MSG_MOD(extack, + "Inline mode is different between vports"); + } + } + return 0; +} + +static int mlx5_esw_offloads_rep_init(struct mlx5_eswitch *esw, const struct mlx5_vport *vport) +{ + struct mlx5_eswitch_rep *rep; + int rep_type; + int err; + + rep = kzalloc(sizeof(*rep), GFP_KERNEL); + if (!rep) + return -ENOMEM; + + rep->vport = vport->vport; + rep->vport_index = vport->index; + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) + atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED); + + err = xa_insert(&esw->offloads.vport_reps, rep->vport, rep, GFP_KERNEL); + if (err) + goto insert_err; + + return 0; + +insert_err: + kfree(rep); + return err; +} + +static void mlx5_esw_offloads_rep_cleanup(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + xa_erase(&esw->offloads.vport_reps, rep->vport); + kfree(rep); +} + +static void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw) +{ + struct mlx5_eswitch_rep *rep; + unsigned long i; + + mlx5_esw_for_each_rep(esw, i, rep) + mlx5_esw_offloads_rep_cleanup(esw, rep); + xa_destroy(&esw->offloads.vport_reps); +} + +static int esw_offloads_init_reps(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + int err; + + xa_init(&esw->offloads.vport_reps); + + mlx5_esw_for_each_vport(esw, i, vport) { + err = mlx5_esw_offloads_rep_init(esw, vport); + if (err) + goto err; + } + return 0; + +err: + esw_offloads_cleanup_reps(esw); + return err; +} + +static int esw_port_metadata_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; + int err = 0; + + down_write(&esw->mode_lock); + if (mlx5_esw_is_fdb_created(esw)) { + err = -EBUSY; + goto done; + } + if (!mlx5_esw_vport_match_metadata_supported(esw)) { + err = -EOPNOTSUPP; + goto done; + } + if (ctx->val.vbool) + esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; + else + esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; +done: + up_write(&esw->mode_lock); + return err; +} + +static int esw_port_metadata_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + ctx->val.vbool = mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch); + return 0; +} + +static int esw_port_metadata_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u8 esw_mode; + + esw_mode = mlx5_eswitch_mode(dev); + if (esw_mode == MLX5_ESWITCH_OFFLOADS) { + NL_SET_ERR_MSG_MOD(extack, + "E-Switch must either disabled or non switchdev mode"); + return -EBUSY; + } + return 0; +} + +static const struct devlink_param esw_devlink_params[] = { + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA, + "esw_port_metadata", DEVLINK_PARAM_TYPE_BOOL, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + esw_port_metadata_get, + esw_port_metadata_set, + esw_port_metadata_validate), +}; + +int esw_offloads_init(struct mlx5_eswitch *esw) +{ + int err; + + err = esw_offloads_init_reps(esw); + if (err) + return err; + + err = devl_params_register(priv_to_devlink(esw->dev), + esw_devlink_params, + ARRAY_SIZE(esw_devlink_params)); + if (err) + goto err_params; + + return 0; + +err_params: + esw_offloads_cleanup_reps(esw); + return err; +} + +void esw_offloads_cleanup(struct mlx5_eswitch *esw) +{ + devl_params_unregister(priv_to_devlink(esw->dev), + esw_devlink_params, + ARRAY_SIZE(esw_devlink_params)); + esw_offloads_cleanup_reps(esw); +} + +static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, u8 rep_type) +{ + if (atomic_cmpxchg(&rep->rep_data[rep_type].state, + REP_LOADED, REP_REGISTERED) == REP_LOADED) + esw->offloads.rep_ops[rep_type]->unload(rep); +} + +static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) +{ + struct mlx5_eswitch_rep *rep; + unsigned long i; + + mlx5_esw_for_each_rep(esw, i, rep) + __esw_offloads_unload_rep(esw, rep, rep_type); +} + +static int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_eswitch_rep *rep; + int rep_type; + int err; + + rep = mlx5_eswitch_get_rep(esw, vport_num); + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) + if (atomic_cmpxchg(&rep->rep_data[rep_type].state, + REP_REGISTERED, REP_LOADED) == REP_REGISTERED) { + err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep); + if (err) + goto err_reps; + } + + return 0; + +err_reps: + atomic_set(&rep->rep_data[rep_type].state, REP_REGISTERED); + for (--rep_type; rep_type >= 0; rep_type--) + __esw_offloads_unload_rep(esw, rep, rep_type); + return err; +} + +static void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_eswitch_rep *rep; + int rep_type; + + rep = mlx5_eswitch_get_rep(esw, vport_num); + for (rep_type = NUM_REP_TYPES - 1; rep_type >= 0; rep_type--) + __esw_offloads_unload_rep(esw, rep, rep_type); +} + +int mlx5_esw_offloads_init_pf_vf_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + if (esw->mode != MLX5_ESWITCH_OFFLOADS) + return 0; + + return mlx5_esw_offloads_pf_vf_devlink_port_init(esw, vport); +} + +void mlx5_esw_offloads_cleanup_pf_vf_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + if (esw->mode != MLX5_ESWITCH_OFFLOADS) + return; + + mlx5_esw_offloads_pf_vf_devlink_port_cleanup(esw, vport); +} + +int mlx5_esw_offloads_init_sf_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + struct mlx5_devlink_port *dl_port, + u32 controller, u32 sfnum) +{ + return mlx5_esw_offloads_sf_devlink_port_init(esw, vport, dl_port, controller, sfnum); +} + +void mlx5_esw_offloads_cleanup_sf_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + mlx5_esw_offloads_sf_devlink_port_cleanup(esw, vport); +} + +int mlx5_esw_offloads_load_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + int err; + + if (esw->mode != MLX5_ESWITCH_OFFLOADS) + return 0; + + err = mlx5_esw_offloads_devlink_port_register(esw, vport); + if (err) + return err; + + err = mlx5_esw_offloads_rep_load(esw, vport->vport); + if (err) + goto load_err; + return err; + +load_err: + mlx5_esw_offloads_devlink_port_unregister(esw, vport); + return err; +} + +void mlx5_esw_offloads_unload_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + if (esw->mode != MLX5_ESWITCH_OFFLOADS) + return; + + mlx5_esw_offloads_rep_unload(esw, vport->vport); + + mlx5_esw_offloads_devlink_port_unregister(esw, vport); +} + +static int esw_set_slave_root_fdb(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave) +{ + u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {}; + u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {}; + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_namespace *ns; + int err; + + MLX5_SET(set_flow_table_root_in, in, opcode, + MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); + MLX5_SET(set_flow_table_root_in, in, table_type, + FS_FT_FDB); + + if (master) { + ns = mlx5_get_flow_namespace(master, + MLX5_FLOW_NAMESPACE_FDB); + root = find_root(&ns->node); + mutex_lock(&root->chain_lock); + MLX5_SET(set_flow_table_root_in, in, + table_eswitch_owner_vhca_id_valid, 1); + MLX5_SET(set_flow_table_root_in, in, + table_eswitch_owner_vhca_id, + MLX5_CAP_GEN(master, vhca_id)); + MLX5_SET(set_flow_table_root_in, in, table_id, + root->root_ft->id); + } else { + ns = mlx5_get_flow_namespace(slave, + MLX5_FLOW_NAMESPACE_FDB); + root = find_root(&ns->node); + mutex_lock(&root->chain_lock); + MLX5_SET(set_flow_table_root_in, in, table_id, + root->root_ft->id); + } + + err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out)); + mutex_unlock(&root->chain_lock); + + return err; +} + +static int __esw_set_master_egress_rule(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave, + struct mlx5_vport *vport, + struct mlx5_flow_table *acl) +{ + u16 slave_index = MLX5_CAP_GEN(slave, vhca_id); + struct mlx5_flow_handle *flow_rule = NULL; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec; + int err = 0; + void *misc; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK); + MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, slave_index); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = slave->priv.eswitch->manager_vport; + dest.vport.vhca_id = MLX5_CAP_GEN(slave, vhca_id); + dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + + flow_rule = mlx5_add_flow_rules(acl, spec, &flow_act, + &dest, 1); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + } else { + err = xa_insert(&vport->egress.offloads.bounce_rules, + slave_index, flow_rule, GFP_KERNEL); + if (err) + mlx5_del_flow_rules(flow_rule); + } + + kvfree(spec); + return err; +} + +static int esw_master_egress_create_resources(struct mlx5_eswitch *esw, + struct mlx5_flow_namespace *egress_ns, + struct mlx5_vport *vport, size_t count) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_attr ft_attr = { + .max_fte = count, .prio = 0, .level = 0, + }; + struct mlx5_flow_table *acl; + struct mlx5_flow_group *g; + void *match_criteria; + u32 *flow_group_in; + int err; + + if (vport->egress.acl) + return 0; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + if (vport->vport || mlx5_core_is_ecpf(esw->dev)) + ft_attr.flags = MLX5_FLOW_TABLE_OTHER_VPORT; + + acl = mlx5_create_vport_flow_table(egress_ns, &ft_attr, vport->vport); + if (IS_ERR(acl)) { + err = PTR_ERR(acl); + goto out; + } + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_port); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_eswitch_owner_vhca_id); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + MLX5_SET(create_flow_group_in, flow_group_in, + source_eswitch_owner_vhca_id_valid, 1); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, count); + + g = mlx5_create_flow_group(acl, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + goto err_group; + } + + vport->egress.acl = acl; + vport->egress.offloads.bounce_grp = g; + vport->egress.type = VPORT_EGRESS_ACL_TYPE_SHARED_FDB; + xa_init_flags(&vport->egress.offloads.bounce_rules, XA_FLAGS_ALLOC); + + kvfree(flow_group_in); + + return 0; + +err_group: + mlx5_destroy_flow_table(acl); +out: + kvfree(flow_group_in); + return err; +} + +static void esw_master_egress_destroy_resources(struct mlx5_vport *vport) +{ + if (!xa_empty(&vport->egress.offloads.bounce_rules)) + return; + mlx5_destroy_flow_group(vport->egress.offloads.bounce_grp); + vport->egress.offloads.bounce_grp = NULL; + mlx5_destroy_flow_table(vport->egress.acl); + vport->egress.acl = NULL; +} + +static int esw_set_master_egress_rule(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave, size_t count) +{ + struct mlx5_eswitch *esw = master->priv.eswitch; + u16 slave_index = MLX5_CAP_GEN(slave, vhca_id); + struct mlx5_flow_namespace *egress_ns; + struct mlx5_vport *vport; + int err; + + vport = mlx5_eswitch_get_vport(esw, esw->manager_vport); + if (IS_ERR(vport)) + return PTR_ERR(vport); + + egress_ns = mlx5_get_flow_vport_acl_namespace(master, + MLX5_FLOW_NAMESPACE_ESW_EGRESS, + vport->index); + if (!egress_ns) + return -EINVAL; + + if (vport->egress.acl && vport->egress.type != VPORT_EGRESS_ACL_TYPE_SHARED_FDB) + return 0; + + err = esw_master_egress_create_resources(esw, egress_ns, vport, count); + if (err) + return err; + + if (xa_load(&vport->egress.offloads.bounce_rules, slave_index)) + return -EINVAL; + + err = __esw_set_master_egress_rule(master, slave, vport, vport->egress.acl); + if (err) + goto err_rule; + + return 0; + +err_rule: + esw_master_egress_destroy_resources(vport); + return err; +} + +static void esw_unset_master_egress_rule(struct mlx5_core_dev *dev, + struct mlx5_core_dev *slave_dev) +{ + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(dev->priv.eswitch, + dev->priv.eswitch->manager_vport); + + esw_acl_egress_ofld_bounce_rule_destroy(vport, MLX5_CAP_GEN(slave_dev, vhca_id)); + + if (xa_empty(&vport->egress.offloads.bounce_rules)) { + esw_acl_egress_ofld_cleanup(vport); + xa_destroy(&vport->egress.offloads.bounce_rules); + } +} + +int mlx5_eswitch_offloads_single_fdb_add_one(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw, int max_slaves) +{ + int err; + + err = esw_set_slave_root_fdb(master_esw->dev, + slave_esw->dev); + if (err) + return err; + + err = esw_set_master_egress_rule(master_esw->dev, + slave_esw->dev, max_slaves); + if (err) + goto err_acl; + + return err; + +err_acl: + esw_set_slave_root_fdb(NULL, slave_esw->dev); + return err; +} + +void mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw) +{ + esw_set_slave_root_fdb(NULL, slave_esw->dev); + esw_unset_master_egress_rule(master_esw->dev, slave_esw->dev); +} + +#define ESW_OFFLOADS_DEVCOM_PAIR (0) +#define ESW_OFFLOADS_DEVCOM_UNPAIR (1) + +static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw) +{ + const struct mlx5_eswitch_rep_ops *ops; + struct mlx5_eswitch_rep *rep; + unsigned long i; + u8 rep_type; + + mlx5_esw_for_each_rep(esw, i, rep) { + rep_type = NUM_REP_TYPES; + while (rep_type--) { + ops = esw->offloads.rep_ops[rep_type]; + if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED && + ops->event) + ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_UNPAIR, peer_esw); + } + } +} + +static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw) +{ +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + mlx5e_tc_clean_fdb_peer_flows(esw); +#endif + mlx5_esw_offloads_rep_event_unpair(esw, peer_esw); + esw_del_fdb_peer_miss_rules(esw, peer_esw->dev); +} + +static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw) +{ + const struct mlx5_eswitch_rep_ops *ops; + struct mlx5_eswitch_rep *rep; + unsigned long i; + u8 rep_type; + int err; + + err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev); + if (err) + return err; + + mlx5_esw_for_each_rep(esw, i, rep) { + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { + ops = esw->offloads.rep_ops[rep_type]; + if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED && + ops->event) { + err = ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_PAIR, peer_esw); + if (err) + goto err_out; + } + } + } + + return 0; + +err_out: + mlx5_esw_offloads_unpair(esw, peer_esw); + return err; +} + +static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw, + bool pair) +{ + u16 peer_vhca_id = MLX5_CAP_GEN(peer_esw->dev, vhca_id); + u16 vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); + struct mlx5_flow_root_namespace *peer_ns; + struct mlx5_flow_root_namespace *ns; + int err; + + peer_ns = peer_esw->dev->priv.steering->fdb_root_ns; + ns = esw->dev->priv.steering->fdb_root_ns; + + if (pair) { + err = mlx5_flow_namespace_set_peer(ns, peer_ns, peer_vhca_id); + if (err) + return err; + + err = mlx5_flow_namespace_set_peer(peer_ns, ns, vhca_id); + if (err) { + mlx5_flow_namespace_set_peer(ns, NULL, peer_vhca_id); + return err; + } + } else { + mlx5_flow_namespace_set_peer(ns, NULL, peer_vhca_id); + mlx5_flow_namespace_set_peer(peer_ns, NULL, vhca_id); + } + + return 0; +} + +static int mlx5_esw_offloads_devcom_event(int event, + void *my_data, + void *event_data) +{ + struct mlx5_eswitch *esw = my_data; + struct mlx5_eswitch *peer_esw = event_data; + u16 esw_i, peer_esw_i; + bool esw_paired; + int err; + + peer_esw_i = MLX5_CAP_GEN(peer_esw->dev, vhca_id); + esw_i = MLX5_CAP_GEN(esw->dev, vhca_id); + esw_paired = !!xa_load(&esw->paired, peer_esw_i); + + switch (event) { + case ESW_OFFLOADS_DEVCOM_PAIR: + if (mlx5_eswitch_vport_match_metadata_enabled(esw) != + mlx5_eswitch_vport_match_metadata_enabled(peer_esw)) + break; + + if (esw_paired) + break; + + err = mlx5_esw_offloads_set_ns_peer(esw, peer_esw, true); + if (err) + goto err_out; + + err = mlx5_esw_offloads_pair(esw, peer_esw); + if (err) + goto err_peer; + + err = mlx5_esw_offloads_pair(peer_esw, esw); + if (err) + goto err_pair; + + err = xa_insert(&esw->paired, peer_esw_i, peer_esw, GFP_KERNEL); + if (err) + goto err_xa; + + err = xa_insert(&peer_esw->paired, esw_i, esw, GFP_KERNEL); + if (err) + goto err_peer_xa; + + esw->num_peers++; + peer_esw->num_peers++; + mlx5_devcom_comp_set_ready(esw->devcom, true); + break; + + case ESW_OFFLOADS_DEVCOM_UNPAIR: + if (!esw_paired) + break; + + peer_esw->num_peers--; + esw->num_peers--; + if (!esw->num_peers && !peer_esw->num_peers) + mlx5_devcom_comp_set_ready(esw->devcom, false); + xa_erase(&peer_esw->paired, esw_i); + xa_erase(&esw->paired, peer_esw_i); + mlx5_esw_offloads_unpair(peer_esw, esw); + mlx5_esw_offloads_unpair(esw, peer_esw); + mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false); + break; + } + + return 0; + +err_peer_xa: + xa_erase(&esw->paired, peer_esw_i); +err_xa: + mlx5_esw_offloads_unpair(peer_esw, esw); +err_pair: + mlx5_esw_offloads_unpair(esw, peer_esw); +err_peer: + mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false); +err_out: + mlx5_core_err(esw->dev, "esw offloads devcom event failure, event %u err %d", + event, err); + return err; +} + +void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, u64 key) +{ + int i; + + for (i = 0; i < MLX5_MAX_PORTS; i++) + INIT_LIST_HEAD(&esw->offloads.peer_flows[i]); + mutex_init(&esw->offloads.peer_mutex); + + if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) + return; + + if ((MLX5_VPORT_MANAGER(esw->dev) || mlx5_core_is_ecpf_esw_manager(esw->dev)) && + !mlx5_lag_is_supported(esw->dev)) + return; + + xa_init(&esw->paired); + esw->num_peers = 0; + esw->devcom = mlx5_devcom_register_component(esw->dev->priv.devc, + MLX5_DEVCOM_ESW_OFFLOADS, + key, + mlx5_esw_offloads_devcom_event, + esw); + if (IS_ERR_OR_NULL(esw->devcom)) + return; + + mlx5_devcom_send_event(esw->devcom, + ESW_OFFLOADS_DEVCOM_PAIR, + ESW_OFFLOADS_DEVCOM_UNPAIR, + esw); +} + +void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) +{ + if (IS_ERR_OR_NULL(esw->devcom)) + return; + + mlx5_devcom_send_event(esw->devcom, + ESW_OFFLOADS_DEVCOM_UNPAIR, + ESW_OFFLOADS_DEVCOM_UNPAIR, + esw); + + mlx5_devcom_unregister_component(esw->devcom); + xa_destroy(&esw->paired); + esw->devcom = NULL; +} + +bool mlx5_esw_offloads_devcom_is_ready(struct mlx5_eswitch *esw) +{ + return mlx5_devcom_comp_is_ready(esw->devcom); +} + +bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw) +{ + if (!MLX5_CAP_ESW(esw->dev, esw_uplink_ingress_acl)) + return false; + + if (!(MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) & + MLX5_FDB_TO_VPORT_REG_C_0)) + return false; + + return true; +} + +#define MLX5_ESW_METADATA_RSVD_UPLINK 1 + +/* Share the same metadata for uplink's. This is fine because: + * (a) In shared FDB mode (LAG) both uplink's are treated the + * same and tagged with the same metadata. + * (b) In non shared FDB mode, packets from physical port0 + * cannot hit eswitch of PF1 and vice versa. + */ +static u32 mlx5_esw_match_metadata_reserved(struct mlx5_eswitch *esw) +{ + return MLX5_ESW_METADATA_RSVD_UPLINK; +} + +u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw) +{ + u32 vport_end_ida = (1 << ESW_VPORT_BITS) - 1; + /* Reserve 0xf for internal port offload */ + u32 max_pf_num = (1 << ESW_PFNUM_BITS) - 2; + u32 pf_num; + int id; + + /* Only 4 bits of pf_num */ + pf_num = mlx5_get_dev_index(esw->dev); + if (pf_num > max_pf_num) + return 0; + + /* Metadata is 4 bits of PFNUM and 12 bits of unique id */ + /* Use only non-zero vport_id (2-4095) for all PF's */ + id = ida_alloc_range(&esw->offloads.vport_metadata_ida, + MLX5_ESW_METADATA_RSVD_UPLINK + 1, + vport_end_ida, GFP_KERNEL); + if (id < 0) + return 0; + id = (pf_num << ESW_VPORT_BITS) | id; + return id; +} + +void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata) +{ + u32 vport_bit_mask = (1 << ESW_VPORT_BITS) - 1; + + /* Metadata contains only 12 bits of actual ida id */ + ida_free(&esw->offloads.vport_metadata_ida, metadata & vport_bit_mask); +} + +static int esw_offloads_vport_metadata_setup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (vport->vport == MLX5_VPORT_UPLINK) + vport->default_metadata = mlx5_esw_match_metadata_reserved(esw); + else + vport->default_metadata = mlx5_esw_match_metadata_alloc(esw); + + vport->metadata = vport->default_metadata; + return vport->metadata ? 0 : -ENOSPC; +} + +static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (!vport->default_metadata) + return; + + if (vport->vport == MLX5_VPORT_UPLINK) + return; + + WARN_ON(vport->metadata != vport->default_metadata); + mlx5_esw_match_metadata_free(esw, vport->default_metadata); +} + +static void esw_offloads_metadata_uninit(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) + return; + + mlx5_esw_for_each_vport(esw, i, vport) + esw_offloads_vport_metadata_cleanup(esw, vport); +} + +static int esw_offloads_metadata_init(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + int err; + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) + return 0; + + mlx5_esw_for_each_vport(esw, i, vport) { + err = esw_offloads_vport_metadata_setup(esw, vport); + if (err) + goto metadata_err; + } + + return 0; + +metadata_err: + esw_offloads_metadata_uninit(esw); + return err; +} + +int +esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int err; + + err = esw_acl_ingress_ofld_setup(esw, vport); + if (err) + return err; + + err = esw_acl_egress_ofld_setup(esw, vport); + if (err) + goto egress_err; + + return 0; + +egress_err: + esw_acl_ingress_ofld_cleanup(esw, vport); + return err; +} + +void +esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + esw_acl_egress_ofld_cleanup(vport); + esw_acl_ingress_ofld_cleanup(esw, vport); +} + +static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *uplink, *manager; + int ret; + + uplink = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); + if (IS_ERR(uplink)) + return PTR_ERR(uplink); + + ret = esw_vport_create_offloads_acl_tables(esw, uplink); + if (ret) + return ret; + + manager = mlx5_eswitch_get_vport(esw, esw->manager_vport); + if (IS_ERR(manager)) { + ret = PTR_ERR(manager); + goto err_manager; + } + + ret = esw_vport_create_offloads_acl_tables(esw, manager); + if (ret) + goto err_manager; + + return 0; + +err_manager: + esw_vport_destroy_offloads_acl_tables(esw, uplink); + return ret; +} + +static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, esw->manager_vport); + if (!IS_ERR(vport)) + esw_vport_destroy_offloads_acl_tables(esw, vport); + + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); + if (!IS_ERR(vport)) + esw_vport_destroy_offloads_acl_tables(esw, vport); +} + +int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw) +{ + struct mlx5_eswitch_rep *rep; + unsigned long i; + int ret; + + if (!esw || esw->mode != MLX5_ESWITCH_OFFLOADS) + return 0; + + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); + if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED) + return 0; + + ret = mlx5_esw_offloads_rep_load(esw, MLX5_VPORT_UPLINK); + if (ret) + return ret; + + mlx5_esw_for_each_rep(esw, i, rep) { + if (atomic_read(&rep->rep_data[REP_ETH].state) == REP_LOADED) + mlx5_esw_offloads_rep_load(esw, rep->vport); + } + + return 0; +} + +static int esw_offloads_steering_init(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_indir_table *indir; + int err; + + memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb)); + mutex_init(&esw->fdb_table.offloads.vports.lock); + hash_init(esw->fdb_table.offloads.vports.table); + atomic64_set(&esw->user_count, 0); + + indir = mlx5_esw_indir_table_init(); + if (IS_ERR(indir)) { + err = PTR_ERR(indir); + goto create_indir_err; + } + esw->fdb_table.offloads.indir = indir; + + err = esw_create_offloads_acl_tables(esw); + if (err) + goto create_acl_err; + + err = esw_create_offloads_table(esw); + if (err) + goto create_offloads_err; + + err = esw_create_restore_table(esw); + if (err) + goto create_restore_err; + + err = esw_create_offloads_fdb_tables(esw); + if (err) + goto create_fdb_err; + + err = esw_create_vport_rx_group(esw); + if (err) + goto create_fg_err; + + err = esw_create_vport_rx_drop_group(esw); + if (err) + goto create_rx_drop_fg_err; + + err = esw_create_vport_rx_drop_rule(esw); + if (err) + goto create_rx_drop_rule_err; + + return 0; + +create_rx_drop_rule_err: + esw_destroy_vport_rx_drop_group(esw); +create_rx_drop_fg_err: + esw_destroy_vport_rx_group(esw); +create_fg_err: + esw_destroy_offloads_fdb_tables(esw); +create_fdb_err: + esw_destroy_restore_table(esw); +create_restore_err: + esw_destroy_offloads_table(esw); +create_offloads_err: + esw_destroy_offloads_acl_tables(esw); +create_acl_err: + mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir); +create_indir_err: + mutex_destroy(&esw->fdb_table.offloads.vports.lock); + return err; +} + +static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) +{ + esw_destroy_vport_rx_drop_rule(esw); + esw_destroy_vport_rx_drop_group(esw); + esw_destroy_vport_rx_group(esw); + esw_destroy_offloads_fdb_tables(esw); + esw_destroy_restore_table(esw); + esw_destroy_offloads_table(esw); + esw_destroy_offloads_acl_tables(esw); + mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir); + mutex_destroy(&esw->fdb_table.offloads.vports.lock); +} + +static void +esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out) +{ + struct devlink *devlink; + bool host_pf_disabled; + u16 new_num_vfs; + + new_num_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_num_of_vfs); + host_pf_disabled = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_pf_disabled); + + if (new_num_vfs == esw->esw_funcs.num_vfs || host_pf_disabled) + return; + + devlink = priv_to_devlink(esw->dev); + devl_lock(devlink); + /* Number of VFs can only change from "0 to x" or "x to 0". */ + if (esw->esw_funcs.num_vfs > 0) { + mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); + } else { + int err; + + err = mlx5_eswitch_load_vf_vports(esw, new_num_vfs, + MLX5_VPORT_UC_ADDR_CHANGE); + if (err) { + devl_unlock(devlink); + return; + } + } + esw->esw_funcs.num_vfs = new_num_vfs; + devl_unlock(devlink); +} + +static void esw_functions_changed_event_handler(struct work_struct *work) +{ + struct mlx5_host_work *host_work; + struct mlx5_eswitch *esw; + const u32 *out; + + host_work = container_of(work, struct mlx5_host_work, work); + esw = host_work->esw; + + out = mlx5_esw_query_functions(esw->dev); + if (IS_ERR(out)) + goto out; + + esw_vfs_changed_event_handler(esw, out); + kvfree(out); +out: + kfree(host_work); +} + +int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data) +{ + struct mlx5_esw_functions *esw_funcs; + struct mlx5_host_work *host_work; + struct mlx5_eswitch *esw; + + host_work = kzalloc(sizeof(*host_work), GFP_ATOMIC); + if (!host_work) + return NOTIFY_DONE; + + esw_funcs = mlx5_nb_cof(nb, struct mlx5_esw_functions, nb); + esw = container_of(esw_funcs, struct mlx5_eswitch, esw_funcs); + + host_work->esw = esw; + + INIT_WORK(&host_work->work, esw_functions_changed_event_handler); + queue_work(esw->work_queue, &host_work->work); + + return NOTIFY_OK; +} + +static int mlx5_esw_host_number_init(struct mlx5_eswitch *esw) +{ + const u32 *query_host_out; + + if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) + return 0; + + query_host_out = mlx5_esw_query_functions(esw->dev); + if (IS_ERR(query_host_out)) + return PTR_ERR(query_host_out); + + /* Mark non local controller with non zero controller number. */ + esw->offloads.host_number = MLX5_GET(query_esw_functions_out, query_host_out, + host_params_context.host_number); + kvfree(query_host_out); + return 0; +} + +bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller) +{ + /* Local controller is always valid */ + if (controller == 0) + return true; + + if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) + return false; + + /* External host number starts with zero in device */ + return (controller == esw->offloads.host_number + 1); +} + +int esw_offloads_enable(struct mlx5_eswitch *esw) +{ + struct mapping_ctx *reg_c0_obj_pool; + struct mlx5_vport *vport; + unsigned long i; + u64 mapping_id; + int err; + + mutex_init(&esw->offloads.termtbl_mutex); + mlx5_rdma_enable_roce(esw->dev); + + err = mlx5_esw_host_number_init(esw); + if (err) + goto err_metadata; + + err = esw_offloads_metadata_init(esw); + if (err) + goto err_metadata; + + err = esw_set_passing_vport_metadata(esw, true); + if (err) + goto err_vport_metadata; + + mapping_id = mlx5_query_nic_system_image_guid(esw->dev); + + reg_c0_obj_pool = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN, + sizeof(struct mlx5_mapped_obj), + ESW_REG_C0_USER_DATA_METADATA_MASK, + true); + + if (IS_ERR(reg_c0_obj_pool)) { + err = PTR_ERR(reg_c0_obj_pool); + goto err_pool; + } + esw->offloads.reg_c0_obj_pool = reg_c0_obj_pool; + + err = esw_offloads_steering_init(esw); + if (err) + goto err_steering_init; + + /* Representor will control the vport link state */ + mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN; + if (mlx5_core_ec_sriov_enabled(esw->dev)) + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, esw->esw_funcs.num_ec_vfs) + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN; + + /* Uplink vport rep must load first. */ + err = mlx5_esw_offloads_rep_load(esw, MLX5_VPORT_UPLINK); + if (err) + goto err_uplink; + + err = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE); + if (err) + goto err_vports; + + return 0; + +err_vports: + mlx5_esw_offloads_rep_unload(esw, MLX5_VPORT_UPLINK); +err_uplink: + esw_offloads_steering_cleanup(esw); +err_steering_init: + mapping_destroy(reg_c0_obj_pool); +err_pool: + esw_set_passing_vport_metadata(esw, false); +err_vport_metadata: + esw_offloads_metadata_uninit(esw); +err_metadata: + mlx5_rdma_disable_roce(esw->dev); + mutex_destroy(&esw->offloads.termtbl_mutex); + return err; +} + +static int esw_offloads_stop(struct mlx5_eswitch *esw, + struct netlink_ext_ack *extack) +{ + int err; + + esw->mode = MLX5_ESWITCH_LEGACY; + + /* If changing from switchdev to legacy mode without sriov enabled, + * no need to create legacy fdb. + */ + if (!mlx5_core_is_pf(esw->dev) || !mlx5_sriov_is_enabled(esw->dev)) + return 0; + + err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_IGNORE_NUM_VFS); + if (err) + NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy"); + + return err; +} + +void esw_offloads_disable(struct mlx5_eswitch *esw) +{ + mlx5_eswitch_disable_pf_vf_vports(esw); + mlx5_esw_offloads_rep_unload(esw, MLX5_VPORT_UPLINK); + esw_set_passing_vport_metadata(esw, false); + esw_offloads_steering_cleanup(esw); + mapping_destroy(esw->offloads.reg_c0_obj_pool); + esw_offloads_metadata_uninit(esw); + mlx5_rdma_disable_roce(esw->dev); + mutex_destroy(&esw->offloads.termtbl_mutex); +} + +static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) +{ + switch (mode) { + case DEVLINK_ESWITCH_MODE_LEGACY: + *mlx5_mode = MLX5_ESWITCH_LEGACY; + break; + case DEVLINK_ESWITCH_MODE_SWITCHDEV: + *mlx5_mode = MLX5_ESWITCH_OFFLOADS; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode) +{ + switch (mlx5_mode) { + case MLX5_ESWITCH_LEGACY: + *mode = DEVLINK_ESWITCH_MODE_LEGACY; + break; + case MLX5_ESWITCH_OFFLOADS: + *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int esw_inline_mode_from_devlink(u8 mode, u8 *mlx5_mode) +{ + switch (mode) { + case DEVLINK_ESWITCH_INLINE_MODE_NONE: + *mlx5_mode = MLX5_INLINE_MODE_NONE; + break; + case DEVLINK_ESWITCH_INLINE_MODE_LINK: + *mlx5_mode = MLX5_INLINE_MODE_L2; + break; + case DEVLINK_ESWITCH_INLINE_MODE_NETWORK: + *mlx5_mode = MLX5_INLINE_MODE_IP; + break; + case DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT: + *mlx5_mode = MLX5_INLINE_MODE_TCP_UDP; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode) +{ + switch (mlx5_mode) { + case MLX5_INLINE_MODE_NONE: + *mode = DEVLINK_ESWITCH_INLINE_MODE_NONE; + break; + case MLX5_INLINE_MODE_L2: + *mode = DEVLINK_ESWITCH_INLINE_MODE_LINK; + break; + case MLX5_INLINE_MODE_IP: + *mode = DEVLINK_ESWITCH_INLINE_MODE_NETWORK; + break; + case MLX5_INLINE_MODE_TCP_UDP: + *mode = DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT; + break; + default: + return -EINVAL; + } + + return 0; +} + +static bool esw_offloads_devlink_ns_eq_netdev_ns(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct net *devl_net, *netdev_net; + bool ret = false; + + mutex_lock(&dev->mlx5e_res.uplink_netdev_lock); + if (dev->mlx5e_res.uplink_netdev) { + netdev_net = dev_net(dev->mlx5e_res.uplink_netdev); + devl_net = devlink_net(devlink); + ret = net_eq(devl_net, netdev_net); + } + mutex_unlock(&dev->mlx5e_res.uplink_netdev_lock); + return ret; +} + +int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw = dev->priv.eswitch; + int err; + + if (!mlx5_esw_allowed(esw)) + return 0; + + /* Take TC into account */ + err = mlx5_esw_try_lock(esw); + if (err < 0) + return err; + + esw->offloads.num_block_mode++; + mlx5_esw_unlock(esw); + return 0; +} + +void mlx5_eswitch_unblock_mode(struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw = dev->priv.eswitch; + + if (!mlx5_esw_allowed(esw)) + return; + + down_write(&esw->mode_lock); + esw->offloads.num_block_mode--; + up_write(&esw->mode_lock); +} + +int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, + struct netlink_ext_ack *extack) +{ + u16 cur_mlx5_mode, mlx5_mode = 0; + struct mlx5_eswitch *esw; + int err = 0; + + esw = mlx5_devlink_eswitch_get(devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + if (esw_mode_from_devlink(mode, &mlx5_mode)) + return -EINVAL; + + if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && + !esw_offloads_devlink_ns_eq_netdev_ns(devlink)) { + NL_SET_ERR_MSG_MOD(extack, + "Can't change E-Switch mode to switchdev when netdev net namespace has diverged from the devlink's."); + return -EPERM; + } + + mlx5_lag_disable_change(esw->dev); + err = mlx5_esw_try_lock(esw); + if (err < 0) { + NL_SET_ERR_MSG_MOD(extack, "Can't change mode, E-Switch is busy"); + goto enable_lag; + } + cur_mlx5_mode = err; + err = 0; + + if (cur_mlx5_mode == mlx5_mode) + goto unlock; + + if (esw->offloads.num_block_mode) { + NL_SET_ERR_MSG_MOD(extack, + "Can't change eswitch mode when IPsec SA and/or policies are configured"); + err = -EOPNOTSUPP; + goto unlock; + } + + esw->eswitch_operation_in_progress = true; + up_write(&esw->mode_lock); + + mlx5_eswitch_disable_locked(esw); + if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) { + if (mlx5_devlink_trap_get_num_active(esw->dev)) { + NL_SET_ERR_MSG_MOD(extack, + "Can't change mode while devlink traps are active"); + err = -EOPNOTSUPP; + goto skip; + } + err = esw_offloads_start(esw, extack); + } else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) { + err = esw_offloads_stop(esw, extack); + mlx5_rescan_drivers(esw->dev); + } else { + err = -EINVAL; + } + +skip: + down_write(&esw->mode_lock); + esw->eswitch_operation_in_progress = false; +unlock: + mlx5_esw_unlock(esw); +enable_lag: + mlx5_lag_enable_change(esw->dev); + return err; +} + +int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) +{ + struct mlx5_eswitch *esw; + + esw = mlx5_devlink_eswitch_get(devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + return esw_mode_to_devlink(esw->mode, mode); +} + +static int mlx5_esw_vports_inline_set(struct mlx5_eswitch *esw, u8 mlx5_mode, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_vport *vport; + u16 err_vport_num = 0; + unsigned long i; + int err = 0; + + mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) { + err = mlx5_modify_nic_vport_min_inline(dev, vport->vport, mlx5_mode); + if (err) { + err_vport_num = vport->vport; + NL_SET_ERR_MSG_MOD(extack, + "Failed to set min inline on vport"); + goto revert_inline_mode; + } + } + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, esw->esw_funcs.num_ec_vfs) { + err = mlx5_modify_nic_vport_min_inline(dev, vport->vport, mlx5_mode); + if (err) { + err_vport_num = vport->vport; + NL_SET_ERR_MSG_MOD(extack, + "Failed to set min inline on vport"); + goto revert_ec_vf_inline_mode; + } + } + } + return 0; + +revert_ec_vf_inline_mode: + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, esw->esw_funcs.num_ec_vfs) { + if (vport->vport == err_vport_num) + break; + mlx5_modify_nic_vport_min_inline(dev, + vport->vport, + esw->offloads.inline_mode); + } +revert_inline_mode: + mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) { + if (vport->vport == err_vport_num) + break; + mlx5_modify_nic_vport_min_inline(dev, + vport->vport, + esw->offloads.inline_mode); + } + return err; +} + +int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw; + u8 mlx5_mode; + int err; + + esw = mlx5_devlink_eswitch_get(devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + down_write(&esw->mode_lock); + + switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { + case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: + if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) { + err = 0; + goto out; + } + + fallthrough; + case MLX5_CAP_INLINE_MODE_L2: + NL_SET_ERR_MSG_MOD(extack, "Inline mode can't be set"); + err = -EOPNOTSUPP; + goto out; + case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: + break; + } + + if (atomic64_read(&esw->offloads.num_flows) > 0) { + NL_SET_ERR_MSG_MOD(extack, + "Can't set inline mode when flows are configured"); + err = -EOPNOTSUPP; + goto out; + } + + err = esw_inline_mode_from_devlink(mode, &mlx5_mode); + if (err) + goto out; + + esw->eswitch_operation_in_progress = true; + up_write(&esw->mode_lock); + + err = mlx5_esw_vports_inline_set(esw, mlx5_mode, extack); + if (!err) + esw->offloads.inline_mode = mlx5_mode; + + down_write(&esw->mode_lock); + esw->eswitch_operation_in_progress = false; + up_write(&esw->mode_lock); + return 0; + +out: + up_write(&esw->mode_lock); + return err; +} + +int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) +{ + struct mlx5_eswitch *esw; + + esw = mlx5_devlink_eswitch_get(devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); +} + +bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw = dev->priv.eswitch; + + if (!mlx5_esw_allowed(esw)) + return true; + + down_write(&esw->mode_lock); + if (esw->mode != MLX5_ESWITCH_LEGACY && + esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) { + up_write(&esw->mode_lock); + return false; + } + + esw->offloads.num_block_encap++; + up_write(&esw->mode_lock); + return true; +} + +void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw = dev->priv.eswitch; + + if (!mlx5_esw_allowed(esw)) + return; + + down_write(&esw->mode_lock); + esw->offloads.num_block_encap--; + up_write(&esw->mode_lock); +} + +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, + enum devlink_eswitch_encap_mode encap, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw; + int err = 0; + + esw = mlx5_devlink_eswitch_get(devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + down_write(&esw->mode_lock); + + if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE && + (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) || + !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))) { + err = -EOPNOTSUPP; + goto unlock; + } + + if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC) { + err = -EOPNOTSUPP; + goto unlock; + } + + if (esw->mode == MLX5_ESWITCH_LEGACY) { + esw->offloads.encap = encap; + goto unlock; + } + + if (esw->offloads.encap == encap) + goto unlock; + + if (atomic64_read(&esw->offloads.num_flows) > 0) { + NL_SET_ERR_MSG_MOD(extack, + "Can't set encapsulation when flows are configured"); + err = -EOPNOTSUPP; + goto unlock; + } + + if (esw->offloads.num_block_encap) { + NL_SET_ERR_MSG_MOD(extack, + "Can't set encapsulation when IPsec SA and/or policies are configured"); + err = -EOPNOTSUPP; + goto unlock; + } + + esw->eswitch_operation_in_progress = true; + up_write(&esw->mode_lock); + + esw_destroy_offloads_fdb_tables(esw); + + esw->offloads.encap = encap; + + err = esw_create_offloads_fdb_tables(esw); + + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed re-creating fast FDB table"); + esw->offloads.encap = !encap; + (void)esw_create_offloads_fdb_tables(esw); + } + + down_write(&esw->mode_lock); + esw->eswitch_operation_in_progress = false; + +unlock: + up_write(&esw->mode_lock); + return err; +} + +int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, + enum devlink_eswitch_encap_mode *encap) +{ + struct mlx5_eswitch *esw; + + esw = mlx5_devlink_eswitch_get(devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + *encap = esw->offloads.encap; + return 0; +} + +static bool +mlx5_eswitch_vport_has_rep(const struct mlx5_eswitch *esw, u16 vport_num) +{ + /* Currently, only ECPF based device has representor for host PF. */ + if (vport_num == MLX5_VPORT_PF && + !mlx5_core_is_ecpf_esw_manager(esw->dev)) + return false; + + if (vport_num == MLX5_VPORT_ECPF && + !mlx5_ecpf_vport_exists(esw->dev)) + return false; + + return true; +} + +void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, + const struct mlx5_eswitch_rep_ops *ops, + u8 rep_type) +{ + struct mlx5_eswitch_rep_data *rep_data; + struct mlx5_eswitch_rep *rep; + unsigned long i; + + esw->offloads.rep_ops[rep_type] = ops; + mlx5_esw_for_each_rep(esw, i, rep) { + if (likely(mlx5_eswitch_vport_has_rep(esw, rep->vport))) { + rep->esw = esw; + rep_data = &rep->rep_data[rep_type]; + atomic_set(&rep_data->state, REP_REGISTERED); + } + } +} +EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps); + +void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type) +{ + struct mlx5_eswitch_rep *rep; + unsigned long i; + + if (esw->mode == MLX5_ESWITCH_OFFLOADS) + __unload_reps_all_vport(esw, rep_type); + + mlx5_esw_for_each_rep(esw, i, rep) + atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED); +} +EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps); + +void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) +{ + struct mlx5_eswitch_rep *rep; + + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); + return rep->rep_data[rep_type].priv; +} + +void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, + u16 vport, + u8 rep_type) +{ + struct mlx5_eswitch_rep *rep; + + rep = mlx5_eswitch_get_rep(esw, vport); + + if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED && + esw->offloads.rep_ops[rep_type]->get_proto_dev) + return esw->offloads.rep_ops[rep_type]->get_proto_dev(rep); + return NULL; +} +EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev); + +void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type) +{ + return mlx5_eswitch_get_proto_dev(esw, MLX5_VPORT_UPLINK, rep_type); +} +EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev); + +struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, + u16 vport) +{ + return mlx5_eswitch_get_rep(esw, vport); +} +EXPORT_SYMBOL(mlx5_eswitch_vport_rep); + +bool mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw) +{ + return !!(esw->flags & MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED); +} +EXPORT_SYMBOL(mlx5_eswitch_reg_c1_loopback_enabled); + +bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw) +{ + return !!(esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA); +} +EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled); + +u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, + u16 vport_num) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + + if (WARN_ON_ONCE(IS_ERR(vport))) + return 0; + + return vport->metadata << (32 - ESW_SOURCE_PORT_METADATA_BITS); +} +EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match); + +static int mlx5_esw_query_vport_vhca_id(struct mlx5_eswitch *esw, u16 vport_num, u16 *vhca_id) +{ + int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *query_ctx; + void *hca_caps; + int err; + + *vhca_id = 0; + + query_ctx = kzalloc(query_out_sz, GFP_KERNEL); + if (!query_ctx) + return -ENOMEM; + + err = mlx5_vport_get_other_func_general_cap(esw->dev, vport_num, query_ctx); + if (err) + goto out_free; + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); + *vhca_id = MLX5_GET(cmd_hca_cap, hca_caps, vhca_id); + +out_free: + kfree(query_ctx); + return err; +} + +int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num) +{ + u16 *old_entry, *vhca_map_entry, vhca_id; + int err; + + err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id); + if (err) { + esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%u,err=%d)\n", + vport_num, err); + return err; + } + + vhca_map_entry = kmalloc(sizeof(*vhca_map_entry), GFP_KERNEL); + if (!vhca_map_entry) + return -ENOMEM; + + *vhca_map_entry = vport_num; + old_entry = xa_store(&esw->offloads.vhca_map, vhca_id, vhca_map_entry, GFP_KERNEL); + if (xa_is_err(old_entry)) { + kfree(vhca_map_entry); + return xa_err(old_entry); + } + kfree(old_entry); + return 0; +} + +void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num) +{ + u16 *vhca_map_entry, vhca_id; + int err; + + err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id); + if (err) + esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%hu,err=%d)\n", + vport_num, err); + + vhca_map_entry = xa_erase(&esw->offloads.vhca_map, vhca_id); + kfree(vhca_map_entry); +} + +int mlx5_eswitch_vhca_id_to_vport(struct mlx5_eswitch *esw, u16 vhca_id, u16 *vport_num) +{ + u16 *res = xa_load(&esw->offloads.vhca_map, vhca_id); + + if (!res) + return -ENOENT; + + *vport_num = *res; + return 0; +} + +u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, + u16 vport_num) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + + if (WARN_ON_ONCE(IS_ERR(vport))) + return 0; + + return vport->metadata; +} +EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_set); + +int mlx5_devlink_port_fn_hw_addr_get(struct devlink_port *port, + u8 *hw_addr, int *hw_addr_len, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = mlx5_devlink_eswitch_nocheck_get(port->devlink); + struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port); + + mutex_lock(&esw->state_lock); + ether_addr_copy(hw_addr, vport->info.mac); + *hw_addr_len = ETH_ALEN; + mutex_unlock(&esw->state_lock); + return 0; +} + +int mlx5_devlink_port_fn_hw_addr_set(struct devlink_port *port, + const u8 *hw_addr, int hw_addr_len, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = mlx5_devlink_eswitch_nocheck_get(port->devlink); + struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port); + + return mlx5_eswitch_set_vport_mac(esw, vport->vport, hw_addr); +} + +int mlx5_devlink_port_fn_migratable_get(struct devlink_port *port, bool *is_enabled, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = mlx5_devlink_eswitch_nocheck_get(port->devlink); + struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port); + + if (!MLX5_CAP_GEN(esw->dev, migration)) { + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support migration"); + return -EOPNOTSUPP; + } + + if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) { + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support VHCA management"); + return -EOPNOTSUPP; + } + + mutex_lock(&esw->state_lock); + *is_enabled = vport->info.mig_enabled; + mutex_unlock(&esw->state_lock); + return 0; +} + +int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = mlx5_devlink_eswitch_nocheck_get(port->devlink); + struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port); + int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *query_ctx; + void *hca_caps; + int err; + + if (!MLX5_CAP_GEN(esw->dev, migration)) { + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support migration"); + return -EOPNOTSUPP; + } + + if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) { + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support VHCA management"); + return -EOPNOTSUPP; + } + + mutex_lock(&esw->state_lock); + + if (vport->info.mig_enabled == enable) { + err = 0; + goto out; + } + + query_ctx = kzalloc(query_out_sz, GFP_KERNEL); + if (!query_ctx) { + err = -ENOMEM; + goto out; + } + + err = mlx5_vport_get_other_func_cap(esw->dev, vport->vport, query_ctx, + MLX5_CAP_GENERAL_2); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps"); + goto out_free; + } + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); + MLX5_SET(cmd_hca_cap_2, hca_caps, migratable, enable); + + err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport->vport, + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA migratable cap"); + goto out_free; + } + + vport->info.mig_enabled = enable; + +out_free: + kfree(query_ctx); +out: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_devlink_port_fn_roce_get(struct devlink_port *port, bool *is_enabled, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = mlx5_devlink_eswitch_nocheck_get(port->devlink); + struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port); + + if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) { + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support VHCA management"); + return -EOPNOTSUPP; + } + + mutex_lock(&esw->state_lock); + *is_enabled = vport->info.roce_enabled; + mutex_unlock(&esw->state_lock); + return 0; +} + +int mlx5_devlink_port_fn_roce_set(struct devlink_port *port, bool enable, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = mlx5_devlink_eswitch_nocheck_get(port->devlink); + struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port); + int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + u16 vport_num = vport->vport; + void *query_ctx; + void *hca_caps; + int err; + + if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) { + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support VHCA management"); + return -EOPNOTSUPP; + } + + mutex_lock(&esw->state_lock); + + if (vport->info.roce_enabled == enable) { + err = 0; + goto out; + } + + query_ctx = kzalloc(query_out_sz, GFP_KERNEL); + if (!query_ctx) { + err = -ENOMEM; + goto out; + } + + err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx, + MLX5_CAP_GENERAL); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps"); + goto out_free; + } + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); + MLX5_SET(cmd_hca_cap, hca_caps, roce, enable); + + err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport_num, + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA roce cap"); + goto out_free; + } + + vport->info.roce_enabled = enable; + +out_free: + kfree(query_ctx); +out: + mutex_unlock(&esw->state_lock); + return err; +} + +int +mlx5_eswitch_restore_ipsec_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule, + struct mlx5_esw_flow_attr *esw_attr, int attr_idx) +{ + struct mlx5_flow_destination new_dest = {}; + struct mlx5_flow_destination old_dest = {}; + + if (!esw_setup_uplink_fwd_ipsec_needed(esw, esw_attr, attr_idx)) + return 0; + + esw_setup_dest_fwd_ipsec(&old_dest, NULL, esw, esw_attr, attr_idx, 0, false); + esw_setup_dest_fwd_vport(&new_dest, NULL, esw, esw_attr, attr_idx, 0, false); + + return mlx5_modify_rule_destination(rule, &new_dest, &old_dest); +} + +#ifdef CONFIG_XFRM_OFFLOAD +int mlx5_devlink_port_fn_ipsec_crypto_get(struct devlink_port *port, bool *is_enabled, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw; + struct mlx5_vport *vport; + int err = 0; + + esw = mlx5_devlink_eswitch_get(port->devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + if (!mlx5_esw_ipsec_vf_offload_supported(esw->dev)) { + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support IPSec crypto"); + return -EOPNOTSUPP; + } + + vport = mlx5_devlink_port_vport_get(port); + + mutex_lock(&esw->state_lock); + if (!vport->enabled) { + err = -EOPNOTSUPP; + goto unlock; + } + + *is_enabled = vport->info.ipsec_crypto_enabled; +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_devlink_port_fn_ipsec_crypto_set(struct devlink_port *port, bool enable, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw; + struct mlx5_vport *vport; + u16 vport_num; + int err; + + esw = mlx5_devlink_eswitch_get(port->devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index); + err = mlx5_esw_ipsec_vf_crypto_offload_supported(esw->dev, vport_num); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Device doesn't support IPsec crypto"); + return err; + } + + vport = mlx5_devlink_port_vport_get(port); + + mutex_lock(&esw->state_lock); + if (!vport->enabled) { + err = -EOPNOTSUPP; + NL_SET_ERR_MSG_MOD(extack, "Eswitch vport is disabled"); + goto unlock; + } + + if (vport->info.ipsec_crypto_enabled == enable) + goto unlock; + + if (!esw->enabled_ipsec_vf_count && esw->dev->num_ipsec_offloads) { + err = -EBUSY; + goto unlock; + } + + err = mlx5_esw_ipsec_vf_crypto_offload_set(esw, vport, enable); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to set IPsec crypto"); + goto unlock; + } + + vport->info.ipsec_crypto_enabled = enable; + if (enable) + esw->enabled_ipsec_vf_count++; + else + esw->enabled_ipsec_vf_count--; +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_devlink_port_fn_ipsec_packet_get(struct devlink_port *port, bool *is_enabled, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw; + struct mlx5_vport *vport; + int err = 0; + + esw = mlx5_devlink_eswitch_get(port->devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + if (!mlx5_esw_ipsec_vf_offload_supported(esw->dev)) { + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support IPsec packet"); + return -EOPNOTSUPP; + } + + vport = mlx5_devlink_port_vport_get(port); + + mutex_lock(&esw->state_lock); + if (!vport->enabled) { + err = -EOPNOTSUPP; + goto unlock; + } + + *is_enabled = vport->info.ipsec_packet_enabled; +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_devlink_port_fn_ipsec_packet_set(struct devlink_port *port, + bool enable, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw; + struct mlx5_vport *vport; + u16 vport_num; + int err; + + esw = mlx5_devlink_eswitch_get(port->devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index); + err = mlx5_esw_ipsec_vf_packet_offload_supported(esw->dev, vport_num); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Device doesn't support IPsec packet mode"); + return err; + } + + vport = mlx5_devlink_port_vport_get(port); + mutex_lock(&esw->state_lock); + if (!vport->enabled) { + err = -EOPNOTSUPP; + NL_SET_ERR_MSG_MOD(extack, "Eswitch vport is disabled"); + goto unlock; + } + + if (vport->info.ipsec_packet_enabled == enable) + goto unlock; + + if (!esw->enabled_ipsec_vf_count && esw->dev->num_ipsec_offloads) { + err = -EBUSY; + goto unlock; + } + + err = mlx5_esw_ipsec_vf_packet_offload_set(esw, vport, enable); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to set IPsec packet mode"); + goto unlock; + } + + vport->info.ipsec_packet_enabled = enable; + if (enable) + esw->enabled_ipsec_vf_count++; + else + esw->enabled_ipsec_vf_count--; +unlock: + mutex_unlock(&esw->state_lock); + return err; +} +#endif /* CONFIG_XFRM_OFFLOAD */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c new file mode 100644 index 0000000000..40bdc677f0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies. + +#include +#include "eswitch.h" +#include "en_tc.h" +#include "fs_core.h" + +struct mlx5_termtbl_handle { + struct hlist_node termtbl_hlist; + + struct mlx5_flow_table *termtbl; + struct mlx5_flow_act flow_act; + struct mlx5_flow_destination dest; + + struct mlx5_flow_handle *rule; + int ref_count; +}; + +static u32 +mlx5_eswitch_termtbl_hash(struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest) +{ + u32 hash; + + hash = jhash_1word(flow_act->action, 0); + hash = jhash((const void *)&flow_act->vlan, + sizeof(flow_act->vlan), hash); + hash = jhash((const void *)&dest->vport.num, + sizeof(dest->vport.num), hash); + hash = jhash((const void *)&dest->vport.vhca_id, + sizeof(dest->vport.num), hash); + if (flow_act->pkt_reformat) + hash = jhash(flow_act->pkt_reformat, + sizeof(*flow_act->pkt_reformat), + hash); + return hash; +} + +static int +mlx5_eswitch_termtbl_cmp(struct mlx5_flow_act *flow_act1, + struct mlx5_flow_destination *dest1, + struct mlx5_flow_act *flow_act2, + struct mlx5_flow_destination *dest2) +{ + int ret; + + ret = flow_act1->action != flow_act2->action || + dest1->vport.num != dest2->vport.num || + dest1->vport.vhca_id != dest2->vport.vhca_id || + memcmp(&flow_act1->vlan, &flow_act2->vlan, + sizeof(flow_act1->vlan)); + if (ret) + return ret; + + if (flow_act1->pkt_reformat && flow_act2->pkt_reformat) + return memcmp(flow_act1->pkt_reformat, flow_act2->pkt_reformat, + sizeof(*flow_act1->pkt_reformat)); + + return !(flow_act1->pkt_reformat == flow_act2->pkt_reformat); +} + +static int +mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, + struct mlx5_termtbl_handle *tt, + struct mlx5_flow_act *flow_act) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *root_ns; + int err, err2; + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + return -EOPNOTSUPP; + } + + /* As this is the terminating action then the termination table is the + * same prio as the slow path + */ + ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION | MLX5_FLOW_TABLE_UNMANAGED | + MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + ft_attr.prio = FDB_TC_OFFLOAD; + ft_attr.max_fte = 1; + ft_attr.level = 1; + ft_attr.autogroup.max_num_groups = 1; + tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); + if (IS_ERR(tt->termtbl)) { + err = PTR_ERR(tt->termtbl); + esw_warn(dev, "Failed to create termination table, err %pe\n", tt->termtbl); + return err; + } + + tt->rule = mlx5_add_flow_rules(tt->termtbl, NULL, flow_act, + &tt->dest, 1); + if (IS_ERR(tt->rule)) { + err = PTR_ERR(tt->rule); + esw_warn(dev, "Failed to create termination table rule, err %pe\n", tt->rule); + goto add_flow_err; + } + return 0; + +add_flow_err: + err2 = mlx5_destroy_flow_table(tt->termtbl); + if (err2) + esw_warn(dev, "Failed to destroy termination table, err %d\n", err2); + + return err; +} + +static struct mlx5_termtbl_handle * +mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + struct mlx5_esw_flow_attr *attr) +{ + struct mlx5_termtbl_handle *tt; + bool found = false; + u32 hash_key; + int err; + + mutex_lock(&esw->offloads.termtbl_mutex); + hash_key = mlx5_eswitch_termtbl_hash(flow_act, dest); + hash_for_each_possible(esw->offloads.termtbl_tbl, tt, + termtbl_hlist, hash_key) { + if (!mlx5_eswitch_termtbl_cmp(&tt->flow_act, &tt->dest, + flow_act, dest)) { + found = true; + break; + } + } + if (found) + goto tt_add_ref; + + tt = kzalloc(sizeof(*tt), GFP_KERNEL); + if (!tt) { + err = -ENOMEM; + goto tt_create_err; + } + + tt->dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + tt->dest.vport.num = dest->vport.num; + tt->dest.vport.vhca_id = dest->vport.vhca_id; + tt->dest.vport.flags = dest->vport.flags; + memcpy(&tt->flow_act, flow_act, sizeof(*flow_act)); + + err = mlx5_eswitch_termtbl_create(esw->dev, tt, flow_act); + if (err) + goto tt_create_err; + + hash_add(esw->offloads.termtbl_tbl, &tt->termtbl_hlist, hash_key); +tt_add_ref: + tt->ref_count++; + mutex_unlock(&esw->offloads.termtbl_mutex); + return tt; +tt_create_err: + kfree(tt); + mutex_unlock(&esw->offloads.termtbl_mutex); + return ERR_PTR(err); +} + +void +mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw, + struct mlx5_termtbl_handle *tt) +{ + mutex_lock(&esw->offloads.termtbl_mutex); + if (--tt->ref_count == 0) + hash_del(&tt->termtbl_hlist); + mutex_unlock(&esw->offloads.termtbl_mutex); + + if (!tt->ref_count) { + mlx5_del_flow_rules(tt->rule); + mlx5_destroy_flow_table(tt->termtbl); + kfree(tt); + } +} + +static void +mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src, + struct mlx5_flow_act *dst) +{ + if (src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) { + src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + memcpy(&dst->vlan[0], &src->vlan[0], sizeof(src->vlan[0])); + memset(&src->vlan[0], 0, sizeof(src->vlan[0])); + + if (src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) { + src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; + dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; + memcpy(&dst->vlan[1], &src->vlan[1], sizeof(src->vlan[1])); + memset(&src->vlan[1], 0, sizeof(src->vlan[1])); + } + } +} + +static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw, + const struct mlx5_flow_spec *spec) +{ + u16 port_mask, port_value; + + if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source)) + return spec->flow_context.flow_source == + MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + + port_mask = MLX5_GET(fte_match_param, spec->match_criteria, + misc_parameters.source_port); + port_value = MLX5_GET(fte_match_param, spec->match_value, + misc_parameters.source_port); + return (port_mask & port_value) == MLX5_VPORT_UPLINK; +} + +bool +mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_spec *spec) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + int i; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table) || + !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level) || + mlx5e_tc_attr_flags_skip(attr->flags) || + (!mlx5_eswitch_offload_is_uplink_port(esw, spec) && !esw_attr->int_port)) + return false; + + /* push vlan on RX */ + if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH && + !(mlx5_fs_get_capabilities(esw->dev, MLX5_FLOW_NAMESPACE_FDB) & + MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX)) + return true; + + /* hairpin */ + for (i = esw_attr->split_count; i < esw_attr->out_count; i++) + if (!esw_attr->dest_int_port && esw_attr->dests[i].vport_valid && + esw_attr->dests[i].vport == MLX5_VPORT_UPLINK) + return true; + + return false; +} + +struct mlx5_flow_handle * +mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_table *fdb, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int num_dest) +{ + struct mlx5_flow_act term_tbl_act = {}; + struct mlx5_flow_handle *rule = NULL; + bool term_table_created = false; + int num_vport_dests = 0; + int i, curr_dest; + + mlx5_eswitch_termtbl_actions_move(flow_act, &term_tbl_act); + term_tbl_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + for (i = 0; i < num_dest; i++) { + struct mlx5_termtbl_handle *tt; + + /* only vport destinations can be terminated */ + if (dest[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT) + continue; + + if (attr->dests[num_vport_dests].flags & MLX5_ESW_DEST_ENCAP) { + term_tbl_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + term_tbl_act.pkt_reformat = attr->dests[num_vport_dests].pkt_reformat; + } else { + term_tbl_act.action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + term_tbl_act.pkt_reformat = NULL; + } + + /* get the terminating table for the action list */ + tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act, + &dest[i], attr); + if (IS_ERR(tt)) { + esw_warn(esw->dev, "Failed to get termination table, err %pe\n", tt); + goto revert_changes; + } + attr->dests[num_vport_dests].termtbl = tt; + num_vport_dests++; + + /* link the destination with the termination table */ + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = tt->termtbl; + term_table_created = true; + } + + /* at least one destination should reference a termination table */ + if (!term_table_created) + goto revert_changes; + + /* create the FTE */ + flow_act->action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act->pkt_reformat = NULL; + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest); + if (IS_ERR(rule)) + goto revert_changes; + + goto out; + +revert_changes: + /* revert the changes that were made to the original flow_act + * and fall-back to the original rule actions + */ + mlx5_eswitch_termtbl_actions_move(&term_tbl_act, flow_act); + + for (curr_dest = 0; curr_dest < num_vport_dests; curr_dest++) { + struct mlx5_termtbl_handle *tt = attr->dests[curr_dest].termtbl; + + attr->dests[curr_dest].termtbl = NULL; + + /* search for the destination associated with the + * current term table + */ + for (i = 0; i < num_dest; i++) { + if (dest[i].ft != tt->termtbl) + continue; + + memset(&dest[i], 0, sizeof(dest[i])); + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest[i].vport.num = tt->dest.vport.num; + dest[i].vport.vhca_id = tt->dest.vport.vhca_id; + mlx5_eswitch_termtbl_put(esw, tt); + break; + } + } + rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest); +out: + return rule; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c new file mode 100644 index 0000000000..3ec892d51f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -0,0 +1,448 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2018 Mellanox Technologies + +#include + +#include "mlx5_core.h" +#include "lib/eq.h" +#include "lib/events.h" + +struct mlx5_event_nb { + struct mlx5_nb nb; + void *ctx; +}; + +/* General events handlers for the low level mlx5_core driver + * + * Other Major feature specific events such as + * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with + * separate notifiers callbacks, specifically by those mlx5 components. + */ +static int any_notifier(struct notifier_block *, unsigned long, void *); +static int temp_warn(struct notifier_block *, unsigned long, void *); +static int port_module(struct notifier_block *, unsigned long, void *); +static int pcie_core(struct notifier_block *, unsigned long, void *); + +/* handler which forwards the event to events->fw_nh, driver notifiers */ +static int forward_event(struct notifier_block *, unsigned long, void *); + +static struct mlx5_nb events_nbs_ref[] = { + /* Events to be processed by mlx5_core */ + {.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY }, + {.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT }, + {.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT }, + {.nb.notifier_call = pcie_core, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT }, + + /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */ + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_OBJECT_CHANGE }, + /* QP/WQ resource events to forward */ + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_DCT_DRAINED }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_COMM_EST }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SQ_DRAINED }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR }, + /* SRQ events */ + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT }, +}; + +struct mlx5_events { + struct mlx5_core_dev *dev; + struct workqueue_struct *wq; + struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)]; + /* driver notifier chain for fw events */ + struct atomic_notifier_head fw_nh; + /* port module events stats */ + struct mlx5_pme_stats pme_stats; + /*pcie_core*/ + struct work_struct pcie_core_work; + /* driver notifier chain for sw events */ + struct blocking_notifier_head sw_nh; +}; + +static const char *eqe_type_str(u8 type) +{ + switch (type) { + case MLX5_EVENT_TYPE_COMP: + return "MLX5_EVENT_TYPE_COMP"; + case MLX5_EVENT_TYPE_PATH_MIG: + return "MLX5_EVENT_TYPE_PATH_MIG"; + case MLX5_EVENT_TYPE_COMM_EST: + return "MLX5_EVENT_TYPE_COMM_EST"; + case MLX5_EVENT_TYPE_SQ_DRAINED: + return "MLX5_EVENT_TYPE_SQ_DRAINED"; + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + return "MLX5_EVENT_TYPE_SRQ_LAST_WQE"; + case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: + return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT"; + case MLX5_EVENT_TYPE_CQ_ERROR: + return "MLX5_EVENT_TYPE_CQ_ERROR"; + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR"; + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + return "MLX5_EVENT_TYPE_PATH_MIG_FAILED"; + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR"; + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR"; + case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: + return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR"; + case MLX5_EVENT_TYPE_INTERNAL_ERROR: + return "MLX5_EVENT_TYPE_INTERNAL_ERROR"; + case MLX5_EVENT_TYPE_PORT_CHANGE: + return "MLX5_EVENT_TYPE_PORT_CHANGE"; + case MLX5_EVENT_TYPE_GPIO_EVENT: + return "MLX5_EVENT_TYPE_GPIO_EVENT"; + case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: + return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT"; + case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: + return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT"; + case MLX5_EVENT_TYPE_REMOTE_CONFIG: + return "MLX5_EVENT_TYPE_REMOTE_CONFIG"; + case MLX5_EVENT_TYPE_DB_BF_CONGESTION: + return "MLX5_EVENT_TYPE_DB_BF_CONGESTION"; + case MLX5_EVENT_TYPE_STALL_EVENT: + return "MLX5_EVENT_TYPE_STALL_EVENT"; + case MLX5_EVENT_TYPE_CMD: + return "MLX5_EVENT_TYPE_CMD"; + case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED: + return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED"; + case MLX5_EVENT_TYPE_VHCA_STATE_CHANGE: + return "MLX5_EVENT_TYPE_VHCA_STATE_CHANGE"; + case MLX5_EVENT_TYPE_PAGE_REQUEST: + return "MLX5_EVENT_TYPE_PAGE_REQUEST"; + case MLX5_EVENT_TYPE_PAGE_FAULT: + return "MLX5_EVENT_TYPE_PAGE_FAULT"; + case MLX5_EVENT_TYPE_PPS_EVENT: + return "MLX5_EVENT_TYPE_PPS_EVENT"; + case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: + return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE"; + case MLX5_EVENT_TYPE_FPGA_ERROR: + return "MLX5_EVENT_TYPE_FPGA_ERROR"; + case MLX5_EVENT_TYPE_FPGA_QP_ERROR: + return "MLX5_EVENT_TYPE_FPGA_QP_ERROR"; + case MLX5_EVENT_TYPE_GENERAL_EVENT: + return "MLX5_EVENT_TYPE_GENERAL_EVENT"; + case MLX5_EVENT_TYPE_MONITOR_COUNTER: + return "MLX5_EVENT_TYPE_MONITOR_COUNTER"; + case MLX5_EVENT_TYPE_DEVICE_TRACER: + return "MLX5_EVENT_TYPE_DEVICE_TRACER"; + case MLX5_EVENT_TYPE_OBJECT_CHANGE: + return "MLX5_EVENT_TYPE_OBJECT_CHANGE"; + default: + return "Unrecognized event"; + } +} + +/* handles all FW events, type == eqe->type */ +static int any_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + + mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n", + eqe_type_str(eqe->type), eqe->sub_type); + return NOTIFY_OK; +} + +/* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */ +static int temp_warn(struct notifier_block *nb, unsigned long type, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + u64 value_lsb; + u64 value_msb; + + value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb); + value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb); + + mlx5_core_warn(events->dev, + "High temperature on sensors with bit set %llx %llx", + value_msb, value_lsb); + + return NOTIFY_OK; +} + +/* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */ +static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status) +{ + switch (status) { + case MLX5_MODULE_STATUS_PLUGGED: + return "Cable plugged"; + case MLX5_MODULE_STATUS_UNPLUGGED: + return "Cable unplugged"; + case MLX5_MODULE_STATUS_ERROR: + return "Cable error"; + case MLX5_MODULE_STATUS_DISABLED: + return "Cable disabled"; + default: + return "Unknown status"; + } +} + +static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error) +{ + switch (error) { + case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED: + return "Power budget exceeded"; + case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX: + return "Long Range for non MLNX cable"; + case MLX5_MODULE_EVENT_ERROR_BUS_STUCK: + return "Bus stuck (I2C or data shorted)"; + case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT: + return "No EEPROM/retry timeout"; + case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST: + return "Enforce part number list"; + case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER: + return "Unknown identifier"; + case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE: + return "High Temperature"; + case MLX5_MODULE_EVENT_ERROR_BAD_CABLE: + return "Bad or shorted cable/module"; + case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED: + return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot"; + default: + return "Unknown error"; + } +} + +/* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */ +static int port_module(struct notifier_block *nb, unsigned long type, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + + enum port_module_event_status_type module_status; + enum port_module_event_error_type error_type; + struct mlx5_eqe_port_module *module_event_eqe; + const char *status_str; + u8 module_num; + + module_event_eqe = &eqe->data.port_module; + module_status = module_event_eqe->module_status & + PORT_MODULE_EVENT_MODULE_STATUS_MASK; + error_type = module_event_eqe->error_type & + PORT_MODULE_EVENT_ERROR_TYPE_MASK; + + if (module_status < MLX5_MODULE_STATUS_NUM) + events->pme_stats.status_counters[module_status]++; + + if (module_status == MLX5_MODULE_STATUS_ERROR) + if (error_type < MLX5_MODULE_EVENT_ERROR_NUM) + events->pme_stats.error_counters[error_type]++; + + if (!printk_ratelimit()) + return NOTIFY_OK; + + module_num = module_event_eqe->module; + status_str = mlx5_pme_status_to_string(module_status); + if (module_status == MLX5_MODULE_STATUS_ERROR) { + const char *error_str = mlx5_pme_error_to_string(error_type); + + mlx5_core_err(events->dev, + "Port module event[error]: module %u, %s, %s\n", + module_num, status_str, error_str); + } else { + mlx5_core_info(events->dev, + "Port module event: module %u, %s\n", + module_num, status_str); + } + + return NOTIFY_OK; +} + +enum { + MLX5_PCI_POWER_COULD_NOT_BE_READ = 0x0, + MLX5_PCI_POWER_SUFFICIENT_REPORTED = 0x1, + MLX5_PCI_POWER_INSUFFICIENT_REPORTED = 0x2, +}; + +static void mlx5_pcie_event(struct work_struct *work) +{ + u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {0}; + u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {0}; + struct mlx5_events *events; + struct mlx5_core_dev *dev; + u8 power_status; + u16 pci_power; + + events = container_of(work, struct mlx5_events, pcie_core_work); + dev = events->dev; + + if (!MLX5_CAP_MCAM_FEATURE(dev, pci_status_and_power)) + return; + + mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MPEIN, 0, 0); + power_status = MLX5_GET(mpein_reg, out, pwr_status); + pci_power = MLX5_GET(mpein_reg, out, pci_power); + + switch (power_status) { + case MLX5_PCI_POWER_COULD_NOT_BE_READ: + mlx5_core_info_rl(dev, + "PCIe slot power capability was not advertised.\n"); + break; + case MLX5_PCI_POWER_INSUFFICIENT_REPORTED: + mlx5_core_warn_rl(dev, + "Detected insufficient power on the PCIe slot (%uW).\n", + pci_power); + break; + case MLX5_PCI_POWER_SUFFICIENT_REPORTED: + mlx5_core_info_rl(dev, + "PCIe slot advertised sufficient power (%uW).\n", + pci_power); + break; + } +} + +static int pcie_core(struct notifier_block *nb, unsigned long type, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, + struct mlx5_event_nb, + nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + + switch (eqe->sub_type) { + case MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT: + queue_work(events->wq, &events->pcie_core_work); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats) +{ + *stats = dev->priv.events->pme_stats; +} + +/* forward event as is to registered interfaces (mlx5e/mlx5_ib) */ +static int forward_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + + mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n", + eqe_type_str(eqe->type), eqe->sub_type); + atomic_notifier_call_chain(&events->fw_nh, event, data); + return NOTIFY_OK; +} + +int mlx5_events_init(struct mlx5_core_dev *dev) +{ + struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL); + + if (!events) + return -ENOMEM; + + ATOMIC_INIT_NOTIFIER_HEAD(&events->fw_nh); + events->dev = dev; + dev->priv.events = events; + events->wq = create_singlethread_workqueue("mlx5_events"); + if (!events->wq) { + kfree(events); + return -ENOMEM; + } + INIT_WORK(&events->pcie_core_work, mlx5_pcie_event); + BLOCKING_INIT_NOTIFIER_HEAD(&events->sw_nh); + + return 0; +} + +void mlx5_events_cleanup(struct mlx5_core_dev *dev) +{ + destroy_workqueue(dev->priv.events->wq); + kvfree(dev->priv.events); +} + +void mlx5_events_start(struct mlx5_core_dev *dev) +{ + struct mlx5_events *events = dev->priv.events; + int i; + + for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) { + events->notifiers[i].nb = events_nbs_ref[i]; + events->notifiers[i].ctx = events; + mlx5_eq_notifier_register(dev, &events->notifiers[i].nb); + } +} + +void mlx5_events_stop(struct mlx5_core_dev *dev) +{ + struct mlx5_events *events = dev->priv.events; + int i; + + for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--) + mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb); + flush_workqueue(events->wq); +} + +/* This API is used only for processing and forwarding firmware + * events to mlx5 consumer. + */ +int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + struct mlx5_events *events = dev->priv.events; + + return atomic_notifier_chain_register(&events->fw_nh, nb); +} +EXPORT_SYMBOL(mlx5_notifier_register); + +int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + struct mlx5_events *events = dev->priv.events; + + return atomic_notifier_chain_unregister(&events->fw_nh, nb); +} +EXPORT_SYMBOL(mlx5_notifier_unregister); + +int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data) +{ + return atomic_notifier_call_chain(&events->fw_nh, event, data); +} + +/* This API is used only for processing and forwarding driver-specific + * events to mlx5 consumers. + */ +int mlx5_blocking_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + struct mlx5_events *events = dev->priv.events; + + return blocking_notifier_chain_register(&events->sw_nh, nb); +} +EXPORT_SYMBOL(mlx5_blocking_notifier_register); + +int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + struct mlx5_events *events = dev->priv.events; + + return blocking_notifier_chain_unregister(&events->sw_nh, nb); +} +EXPORT_SYMBOL(mlx5_blocking_notifier_unregister); + +int mlx5_blocking_notifier_call_chain(struct mlx5_core_dev *dev, unsigned int event, + void *data) +{ + struct mlx5_events *events = dev->priv.events; + + return blocking_notifier_call_chain(&events->sw_nh, event, data); +} + +void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work) +{ + queue_work(dev->priv.events->wq, work); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c new file mode 100644 index 0000000000..9a37077152 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "mlx5_core.h" +#include "fpga/cmd.h" + +#define MLX5_FPGA_ACCESS_REG_SZ (MLX5_ST_SZ_DW(fpga_access_reg) + \ + MLX5_FPGA_ACCESS_REG_SIZE_MAX) + +int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr, + void *buf, bool write) +{ + u32 in[MLX5_FPGA_ACCESS_REG_SZ] = {0}; + u32 out[MLX5_FPGA_ACCESS_REG_SZ]; + int err; + + if (size & 3) + return -EINVAL; + if (addr & 3) + return -EINVAL; + if (size > MLX5_FPGA_ACCESS_REG_SIZE_MAX) + return -EINVAL; + + MLX5_SET(fpga_access_reg, in, size, size); + MLX5_SET64(fpga_access_reg, in, address, addr); + if (write) + memcpy(MLX5_ADDR_OF(fpga_access_reg, in, data), buf, size); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_FPGA_ACCESS_REG, 0, write); + if (err) + return err; + + if (!write) + memcpy(buf, MLX5_ADDR_OF(fpga_access_reg, out, data), size); + + return 0; +} + +int mlx5_fpga_caps(struct mlx5_core_dev *dev) +{ + u32 in[MLX5_ST_SZ_DW(fpga_cap)] = {0}; + + return mlx5_core_access_reg(dev, in, sizeof(in), dev->caps.fpga, + MLX5_ST_SZ_BYTES(fpga_cap), + MLX5_REG_FPGA_CAP, 0, 0); +} + +int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op) +{ + u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0}; + u32 out[MLX5_ST_SZ_DW(fpga_ctrl)]; + + MLX5_SET(fpga_ctrl, in, operation, op); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_FPGA_CTRL, 0, true); +} + +int mlx5_fpga_sbu_caps(struct mlx5_core_dev *dev, void *caps, int size) +{ + unsigned int cap_size = MLX5_CAP_FPGA(dev, sandbox_extended_caps_len); + u64 addr = MLX5_CAP64_FPGA(dev, sandbox_extended_caps_addr); + unsigned int read; + int ret = 0; + + if (cap_size > size) { + mlx5_core_warn(dev, "Not enough buffer %u for FPGA SBU caps %u", + size, cap_size); + return -EINVAL; + } + + while (cap_size > 0) { + read = min_t(unsigned int, cap_size, + MLX5_FPGA_ACCESS_REG_SIZE_MAX); + + ret = mlx5_fpga_access_reg(dev, read, addr, caps, false); + if (ret) { + mlx5_core_warn(dev, "Error reading FPGA SBU caps %u bytes at address 0x%llx: %d", + read, addr, ret); + return ret; + } + + cap_size -= read; + addr += read; + caps += read; + } + + return ret; +} + +int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query) +{ + u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0}; + u32 out[MLX5_ST_SZ_DW(fpga_ctrl)]; + int err; + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_FPGA_CTRL, 0, false); + if (err) + return err; + + query->status = MLX5_GET(fpga_ctrl, out, status); + query->admin_image = MLX5_GET(fpga_ctrl, out, flash_select_admin); + query->oper_image = MLX5_GET(fpga_ctrl, out, flash_select_oper); + return 0; +} + +int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc, + u32 *fpga_qpn) +{ + u32 out[MLX5_ST_SZ_DW(fpga_create_qp_out)] = {}; + u32 in[MLX5_ST_SZ_DW(fpga_create_qp_in)] = {}; + int ret; + + MLX5_SET(fpga_create_qp_in, in, opcode, MLX5_CMD_OP_FPGA_CREATE_QP); + memcpy(MLX5_ADDR_OF(fpga_create_qp_in, in, fpga_qpc), fpga_qpc, + MLX5_FLD_SZ_BYTES(fpga_create_qp_in, fpga_qpc)); + + ret = mlx5_cmd_exec_inout(dev, fpga_create_qp, in, out); + if (ret) + return ret; + + memcpy(fpga_qpc, MLX5_ADDR_OF(fpga_create_qp_out, out, fpga_qpc), + MLX5_FLD_SZ_BYTES(fpga_create_qp_out, fpga_qpc)); + *fpga_qpn = MLX5_GET(fpga_create_qp_out, out, fpga_qpn); + return ret; +} + +int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn, + enum mlx5_fpga_qpc_field_select fields, + void *fpga_qpc) +{ + u32 in[MLX5_ST_SZ_DW(fpga_modify_qp_in)] = {}; + + MLX5_SET(fpga_modify_qp_in, in, opcode, MLX5_CMD_OP_FPGA_MODIFY_QP); + MLX5_SET(fpga_modify_qp_in, in, field_select, fields); + MLX5_SET(fpga_modify_qp_in, in, fpga_qpn, fpga_qpn); + memcpy(MLX5_ADDR_OF(fpga_modify_qp_in, in, fpga_qpc), fpga_qpc, + MLX5_FLD_SZ_BYTES(fpga_modify_qp_in, fpga_qpc)); + + return mlx5_cmd_exec_in(dev, fpga_modify_qp, in); +} + +int mlx5_fpga_query_qp(struct mlx5_core_dev *dev, + u32 fpga_qpn, void *fpga_qpc) +{ + u32 out[MLX5_ST_SZ_DW(fpga_query_qp_out)] = {}; + u32 in[MLX5_ST_SZ_DW(fpga_query_qp_in)] = {}; + int ret; + + MLX5_SET(fpga_query_qp_in, in, opcode, MLX5_CMD_OP_FPGA_QUERY_QP); + MLX5_SET(fpga_query_qp_in, in, fpga_qpn, fpga_qpn); + + ret = mlx5_cmd_exec_inout(dev, fpga_query_qp, in, out); + if (ret) + return ret; + + memcpy(fpga_qpc, MLX5_ADDR_OF(fpga_query_qp_out, out, fpga_qpc), + MLX5_FLD_SZ_BYTES(fpga_query_qp_out, fpga_qpc)); + return ret; +} + +int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn) +{ + u32 in[MLX5_ST_SZ_DW(fpga_destroy_qp_in)] = {}; + + MLX5_SET(fpga_destroy_qp_in, in, opcode, MLX5_CMD_OP_FPGA_DESTROY_QP); + MLX5_SET(fpga_destroy_qp_in, in, fpga_qpn, fpga_qpn); + + return mlx5_cmd_exec_in(dev, fpga_destroy_qp, in); +} + +int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn, + bool clear, struct mlx5_fpga_qp_counters *data) +{ + u32 out[MLX5_ST_SZ_DW(fpga_query_qp_counters_out)] = {}; + u32 in[MLX5_ST_SZ_DW(fpga_query_qp_counters_in)] = {}; + int ret; + + MLX5_SET(fpga_query_qp_counters_in, in, opcode, + MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS); + MLX5_SET(fpga_query_qp_counters_in, in, clear, clear); + MLX5_SET(fpga_query_qp_counters_in, in, fpga_qpn, fpga_qpn); + + ret = mlx5_cmd_exec_inout(dev, fpga_query_qp_counters, in, out); + if (ret) + return ret; + + data->rx_ack_packets = MLX5_GET64(fpga_query_qp_counters_out, out, + rx_ack_packets); + data->rx_send_packets = MLX5_GET64(fpga_query_qp_counters_out, out, + rx_send_packets); + data->tx_ack_packets = MLX5_GET64(fpga_query_qp_counters_out, out, + tx_ack_packets); + data->tx_send_packets = MLX5_GET64(fpga_query_qp_counters_out, out, + tx_send_packets); + data->rx_total_drop = MLX5_GET64(fpga_query_qp_counters_out, out, + rx_total_drop); + + return ret; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h new file mode 100644 index 0000000000..11621d265d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_FPGA_H__ +#define __MLX5_FPGA_H__ + +#include + +enum mlx5_fpga_id { + MLX5_FPGA_NEWTON = 0, + MLX5_FPGA_EDISON = 1, + MLX5_FPGA_MORSE = 2, + MLX5_FPGA_MORSEQ = 3, +}; + +enum mlx5_fpga_image { + MLX5_FPGA_IMAGE_USER = 0, + MLX5_FPGA_IMAGE_FACTORY, +}; + +enum mlx5_fpga_status { + MLX5_FPGA_STATUS_SUCCESS = 0, + MLX5_FPGA_STATUS_FAILURE = 1, + MLX5_FPGA_STATUS_IN_PROGRESS = 2, + MLX5_FPGA_STATUS_NONE = 0xFFFF, +}; + +struct mlx5_fpga_query { + enum mlx5_fpga_image admin_image; + enum mlx5_fpga_image oper_image; + enum mlx5_fpga_status status; +}; + +enum mlx5_fpga_qpc_field_select { + MLX5_FPGA_QPC_STATE = BIT(0), +}; + +struct mlx5_fpga_qp_counters { + u64 rx_ack_packets; + u64 rx_send_packets; + u64 tx_ack_packets; + u64 tx_send_packets; + u64 rx_total_drop; +}; + +int mlx5_fpga_caps(struct mlx5_core_dev *dev); +int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query); +int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op); +int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr, + void *buf, bool write); +int mlx5_fpga_sbu_caps(struct mlx5_core_dev *dev, void *caps, int size); + +int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc, + u32 *fpga_qpn); +int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn, + enum mlx5_fpga_qpc_field_select fields, void *fpga_qpc); +int mlx5_fpga_query_qp(struct mlx5_core_dev *dev, u32 fpga_qpn, void *fpga_qpc); +int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn, + bool clear, struct mlx5_fpga_qp_counters *data); +int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn); + +#endif /* __MLX5_FPGA_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c new file mode 100644 index 0000000000..c4de6bf8d1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -0,0 +1,1001 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include +#include +#include + +#include "mlx5_core.h" +#include "lib/mlx5.h" +#include "fpga/conn.h" + +#define MLX5_FPGA_PKEY 0xFFFF +#define MLX5_FPGA_PKEY_INDEX 0 /* RoCE PKEY 0xFFFF is always at index 0 */ +#define MLX5_FPGA_RECV_SIZE 2048 +#define MLX5_FPGA_PORT_NUM 1 +#define MLX5_FPGA_CQ_BUDGET 64 + +static int mlx5_fpga_conn_map_buf(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + struct device *dma_device; + int err = 0; + + if (unlikely(!buf->sg[0].data)) + goto out; + + dma_device = mlx5_core_dma_dev(conn->fdev->mdev); + buf->sg[0].dma_addr = dma_map_single(dma_device, buf->sg[0].data, + buf->sg[0].size, buf->dma_dir); + err = dma_mapping_error(dma_device, buf->sg[0].dma_addr); + if (unlikely(err)) { + mlx5_fpga_warn(conn->fdev, "DMA error on sg 0: %d\n", err); + err = -ENOMEM; + goto out; + } + + if (!buf->sg[1].data) + goto out; + + buf->sg[1].dma_addr = dma_map_single(dma_device, buf->sg[1].data, + buf->sg[1].size, buf->dma_dir); + err = dma_mapping_error(dma_device, buf->sg[1].dma_addr); + if (unlikely(err)) { + mlx5_fpga_warn(conn->fdev, "DMA error on sg 1: %d\n", err); + dma_unmap_single(dma_device, buf->sg[0].dma_addr, + buf->sg[0].size, buf->dma_dir); + err = -ENOMEM; + } + +out: + return err; +} + +static void mlx5_fpga_conn_unmap_buf(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + struct device *dma_device; + + dma_device = mlx5_core_dma_dev(conn->fdev->mdev); + if (buf->sg[1].data) + dma_unmap_single(dma_device, buf->sg[1].dma_addr, + buf->sg[1].size, buf->dma_dir); + + if (likely(buf->sg[0].data)) + dma_unmap_single(dma_device, buf->sg[0].dma_addr, + buf->sg[0].size, buf->dma_dir); +} + +static int mlx5_fpga_conn_post_recv(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + struct mlx5_wqe_data_seg *data; + unsigned int ix; + int err = 0; + + err = mlx5_fpga_conn_map_buf(conn, buf); + if (unlikely(err)) + goto out; + + if (unlikely(conn->qp.rq.pc - conn->qp.rq.cc >= conn->qp.rq.size)) { + mlx5_fpga_conn_unmap_buf(conn, buf); + return -EBUSY; + } + + ix = conn->qp.rq.pc & (conn->qp.rq.size - 1); + data = mlx5_wq_cyc_get_wqe(&conn->qp.wq.rq, ix); + data->byte_count = cpu_to_be32(buf->sg[0].size); + data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey); + data->addr = cpu_to_be64(buf->sg[0].dma_addr); + + conn->qp.rq.pc++; + conn->qp.rq.bufs[ix] = buf; + + /* Make sure that descriptors are written before doorbell record. */ + dma_wmb(); + *conn->qp.wq.rq.db = cpu_to_be32(conn->qp.rq.pc & 0xffff); +out: + return err; +} + +static void mlx5_fpga_conn_notify_hw(struct mlx5_fpga_conn *conn, void *wqe) +{ + /* ensure wqe is visible to device before updating doorbell record */ + dma_wmb(); + *conn->qp.wq.sq.db = cpu_to_be32(conn->qp.sq.pc); + /* Make sure that doorbell record is visible before ringing */ + wmb(); + mlx5_write64(wqe, conn->fdev->conn_res.uar->map + MLX5_BF_OFFSET); +} + +static void mlx5_fpga_conn_post_send(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + struct mlx5_wqe_ctrl_seg *ctrl; + struct mlx5_wqe_data_seg *data; + unsigned int ix, sgi; + int size = 1; + + ix = conn->qp.sq.pc & (conn->qp.sq.size - 1); + + ctrl = mlx5_wq_cyc_get_wqe(&conn->qp.wq.sq, ix); + data = (void *)(ctrl + 1); + + for (sgi = 0; sgi < ARRAY_SIZE(buf->sg); sgi++) { + if (!buf->sg[sgi].data) + break; + data->byte_count = cpu_to_be32(buf->sg[sgi].size); + data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey); + data->addr = cpu_to_be64(buf->sg[sgi].dma_addr); + data++; + size++; + } + + ctrl->imm = 0; + ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + ctrl->opmod_idx_opcode = cpu_to_be32(((conn->qp.sq.pc & 0xffff) << 8) | + MLX5_OPCODE_SEND); + ctrl->qpn_ds = cpu_to_be32(size | (conn->qp.qpn << 8)); + + conn->qp.sq.pc++; + conn->qp.sq.bufs[ix] = buf; + mlx5_fpga_conn_notify_hw(conn, ctrl); +} + +int mlx5_fpga_conn_send(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + unsigned long flags; + int err; + + if (!conn->qp.active) + return -ENOTCONN; + + buf->dma_dir = DMA_TO_DEVICE; + err = mlx5_fpga_conn_map_buf(conn, buf); + if (err) + return err; + + spin_lock_irqsave(&conn->qp.sq.lock, flags); + + if (conn->qp.sq.pc - conn->qp.sq.cc >= conn->qp.sq.size) { + list_add_tail(&buf->list, &conn->qp.sq.backlog); + goto out_unlock; + } + + mlx5_fpga_conn_post_send(conn, buf); + +out_unlock: + spin_unlock_irqrestore(&conn->qp.sq.lock, flags); + return err; +} + +static int mlx5_fpga_conn_post_recv_buf(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_dma_buf *buf; + int err; + + buf = kzalloc(sizeof(*buf) + MLX5_FPGA_RECV_SIZE, 0); + if (!buf) + return -ENOMEM; + + buf->sg[0].data = (void *)(buf + 1); + buf->sg[0].size = MLX5_FPGA_RECV_SIZE; + buf->dma_dir = DMA_FROM_DEVICE; + + err = mlx5_fpga_conn_post_recv(conn, buf); + if (err) + kfree(buf); + + return err; +} + +static int mlx5_fpga_conn_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, + u32 *mkey) +{ + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + void *mkc; + u32 *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET(mkc, mkc, pd, pdn); + MLX5_SET(mkc, mkc, length64, 1); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + + err = mlx5_core_create_mkey(mdev, mkey, in, inlen); + + kvfree(in); + return err; +} + +static void mlx5_fpga_conn_rq_cqe(struct mlx5_fpga_conn *conn, + struct mlx5_cqe64 *cqe, u8 status) +{ + struct mlx5_fpga_dma_buf *buf; + int ix, err; + + ix = be16_to_cpu(cqe->wqe_counter) & (conn->qp.rq.size - 1); + buf = conn->qp.rq.bufs[ix]; + conn->qp.rq.bufs[ix] = NULL; + conn->qp.rq.cc++; + + if (unlikely(status && (status != MLX5_CQE_SYNDROME_WR_FLUSH_ERR))) + mlx5_fpga_warn(conn->fdev, "RQ buf %p on FPGA QP %u completion status %d\n", + buf, conn->fpga_qpn, status); + else + mlx5_fpga_dbg(conn->fdev, "RQ buf %p on FPGA QP %u completion status %d\n", + buf, conn->fpga_qpn, status); + + mlx5_fpga_conn_unmap_buf(conn, buf); + + if (unlikely(status || !conn->qp.active)) { + conn->qp.active = false; + kfree(buf); + return; + } + + buf->sg[0].size = be32_to_cpu(cqe->byte_cnt); + mlx5_fpga_dbg(conn->fdev, "Message with %u bytes received successfully\n", + buf->sg[0].size); + conn->recv_cb(conn->cb_arg, buf); + + buf->sg[0].size = MLX5_FPGA_RECV_SIZE; + err = mlx5_fpga_conn_post_recv(conn, buf); + if (unlikely(err)) { + mlx5_fpga_warn(conn->fdev, + "Failed to re-post recv buf: %d\n", err); + kfree(buf); + } +} + +static void mlx5_fpga_conn_sq_cqe(struct mlx5_fpga_conn *conn, + struct mlx5_cqe64 *cqe, u8 status) +{ + struct mlx5_fpga_dma_buf *buf, *nextbuf; + unsigned long flags; + int ix; + + spin_lock_irqsave(&conn->qp.sq.lock, flags); + + ix = be16_to_cpu(cqe->wqe_counter) & (conn->qp.sq.size - 1); + buf = conn->qp.sq.bufs[ix]; + conn->qp.sq.bufs[ix] = NULL; + conn->qp.sq.cc++; + + /* Handle backlog still under the spinlock to ensure message post order */ + if (unlikely(!list_empty(&conn->qp.sq.backlog))) { + if (likely(conn->qp.active)) { + nextbuf = list_first_entry(&conn->qp.sq.backlog, + struct mlx5_fpga_dma_buf, list); + list_del(&nextbuf->list); + mlx5_fpga_conn_post_send(conn, nextbuf); + } + } + + spin_unlock_irqrestore(&conn->qp.sq.lock, flags); + + if (unlikely(status && (status != MLX5_CQE_SYNDROME_WR_FLUSH_ERR))) + mlx5_fpga_warn(conn->fdev, "SQ buf %p on FPGA QP %u completion status %d\n", + buf, conn->fpga_qpn, status); + else + mlx5_fpga_dbg(conn->fdev, "SQ buf %p on FPGA QP %u completion status %d\n", + buf, conn->fpga_qpn, status); + + mlx5_fpga_conn_unmap_buf(conn, buf); + + if (likely(buf->complete)) + buf->complete(conn, conn->fdev, buf, status); + + if (unlikely(status)) + conn->qp.active = false; +} + +static void mlx5_fpga_conn_handle_cqe(struct mlx5_fpga_conn *conn, + struct mlx5_cqe64 *cqe) +{ + u8 opcode, status = 0; + + opcode = get_cqe_opcode(cqe); + + switch (opcode) { + case MLX5_CQE_REQ_ERR: + status = ((struct mlx5_err_cqe *)cqe)->syndrome; + fallthrough; + case MLX5_CQE_REQ: + mlx5_fpga_conn_sq_cqe(conn, cqe, status); + break; + + case MLX5_CQE_RESP_ERR: + status = ((struct mlx5_err_cqe *)cqe)->syndrome; + fallthrough; + case MLX5_CQE_RESP_SEND: + mlx5_fpga_conn_rq_cqe(conn, cqe, status); + break; + default: + mlx5_fpga_warn(conn->fdev, "Unexpected cqe opcode %u\n", + opcode); + } +} + +static void mlx5_fpga_conn_arm_cq(struct mlx5_fpga_conn *conn) +{ + mlx5_cq_arm(&conn->cq.mcq, MLX5_CQ_DB_REQ_NOT, + conn->fdev->conn_res.uar->map, conn->cq.wq.cc); +} + +static inline void mlx5_fpga_conn_cqes(struct mlx5_fpga_conn *conn, + unsigned int budget) +{ + struct mlx5_cqe64 *cqe; + + while (budget) { + cqe = mlx5_cqwq_get_cqe(&conn->cq.wq); + if (!cqe) + break; + + budget--; + mlx5_cqwq_pop(&conn->cq.wq); + mlx5_fpga_conn_handle_cqe(conn, cqe); + mlx5_cqwq_update_db_record(&conn->cq.wq); + } + if (!budget) { + tasklet_schedule(&conn->cq.tasklet); + return; + } + + mlx5_fpga_dbg(conn->fdev, "Re-arming CQ with cc# %u\n", conn->cq.wq.cc); + /* ensure cq space is freed before enabling more cqes */ + wmb(); + mlx5_fpga_conn_arm_cq(conn); +} + +static void mlx5_fpga_conn_cq_tasklet(struct tasklet_struct *t) +{ + struct mlx5_fpga_conn *conn = from_tasklet(conn, t, cq.tasklet); + + if (unlikely(!conn->qp.active)) + return; + mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET); +} + +static void mlx5_fpga_conn_cq_complete(struct mlx5_core_cq *mcq, + struct mlx5_eqe *eqe) +{ + struct mlx5_fpga_conn *conn; + + conn = container_of(mcq, struct mlx5_fpga_conn, cq.mcq); + if (unlikely(!conn->qp.active)) + return; + mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET); +} + +static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {0}; + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; + struct mlx5_wq_param wqp; + struct mlx5_cqe64 *cqe; + int inlen, err, eqn; + void *cqc, *in; + __be64 *pas; + u32 i; + + cq_size = roundup_pow_of_two(cq_size); + MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(cq_size)); + + wqp.buf_numa_node = mdev->priv.numa_node; + wqp.db_numa_node = mdev->priv.numa_node; + + err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &conn->cq.wq, + &conn->cq.wq_ctrl); + if (err) + return err; + + for (i = 0; i < mlx5_cqwq_get_size(&conn->cq.wq); i++) { + cqe = mlx5_cqwq_get_wqe(&conn->cq.wq, i); + cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK; + } + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + sizeof(u64) * conn->cq.wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_cqwq; + } + + err = mlx5_comp_eqn_get(mdev, smp_processor_id(), &eqn); + if (err) { + kvfree(in); + goto err_cqwq; + } + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size)); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); + MLX5_SET(cqc, cqc, uar_page, fdev->conn_res.uar->index); + MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(cqc, cqc, dbr_addr, conn->cq.wq_ctrl.db.dma); + + pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); + mlx5_fill_page_frag_array(&conn->cq.wq_ctrl.buf, pas); + + err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen, out, sizeof(out)); + kvfree(in); + + if (err) + goto err_cqwq; + + conn->cq.mcq.cqe_sz = 64; + conn->cq.mcq.set_ci_db = conn->cq.wq_ctrl.db.db; + conn->cq.mcq.arm_db = conn->cq.wq_ctrl.db.db + 1; + *conn->cq.mcq.set_ci_db = 0; + *conn->cq.mcq.arm_db = 0; + conn->cq.mcq.vector = 0; + conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete; + conn->cq.mcq.uar = fdev->conn_res.uar; + tasklet_setup(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet); + + mlx5_fpga_dbg(fdev, "Created CQ #0x%x\n", conn->cq.mcq.cqn); + + goto out; + +err_cqwq: + mlx5_wq_destroy(&conn->cq.wq_ctrl); +out: + return err; +} + +static void mlx5_fpga_conn_destroy_cq(struct mlx5_fpga_conn *conn) +{ + tasklet_disable(&conn->cq.tasklet); + tasklet_kill(&conn->cq.tasklet); + mlx5_core_destroy_cq(conn->fdev->mdev, &conn->cq.mcq); + mlx5_wq_destroy(&conn->cq.wq_ctrl); +} + +static int mlx5_fpga_conn_create_wq(struct mlx5_fpga_conn *conn, void *qpc) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + struct mlx5_wq_param wqp; + + wqp.buf_numa_node = mdev->priv.numa_node; + wqp.db_numa_node = mdev->priv.numa_node; + + return mlx5_wq_qp_create(mdev, &wqp, qpc, &conn->qp.wq, + &conn->qp.wq_ctrl); +} + +static int mlx5_fpga_conn_create_qp(struct mlx5_fpga_conn *conn, + unsigned int tx_size, unsigned int rx_size) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; + struct mlx5_core_dev *mdev = fdev->mdev; + u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {}; + void *in = NULL, *qpc; + int err, inlen; + + conn->qp.rq.pc = 0; + conn->qp.rq.cc = 0; + conn->qp.rq.size = roundup_pow_of_two(rx_size); + conn->qp.sq.pc = 0; + conn->qp.sq.cc = 0; + conn->qp.sq.size = roundup_pow_of_two(tx_size); + + MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); + MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(conn->qp.rq.size)); + MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(conn->qp.sq.size)); + err = mlx5_fpga_conn_create_wq(conn, temp_qpc); + if (err) + goto out; + + conn->qp.rq.bufs = kvcalloc(conn->qp.rq.size, + sizeof(conn->qp.rq.bufs[0]), + GFP_KERNEL); + if (!conn->qp.rq.bufs) { + err = -ENOMEM; + goto err_wq; + } + + conn->qp.sq.bufs = kvcalloc(conn->qp.sq.size, + sizeof(conn->qp.sq.bufs[0]), + GFP_KERNEL); + if (!conn->qp.sq.bufs) { + err = -ENOMEM; + goto err_rq_bufs; + } + + inlen = MLX5_ST_SZ_BYTES(create_qp_in) + + MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * + conn->qp.wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_sq_bufs; + } + + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, uar_page, fdev->conn_res.uar->index); + MLX5_SET(qpc, qpc, log_page_size, + conn->qp.wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(qpc, qpc, fre, 1); + MLX5_SET(qpc, qpc, rlky, 1); + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, pd, fdev->conn_res.pdn); + MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); + MLX5_SET(qpc, qpc, log_rq_size, ilog2(conn->qp.rq.size)); + MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); + MLX5_SET(qpc, qpc, log_sq_size, ilog2(conn->qp.sq.size)); + MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn); + MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn); + MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev)); + MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma); + if (MLX5_CAP_GEN(mdev, cqe_version) == 1) + MLX5_SET(qpc, qpc, user_index, 0xFFFFFF); + + mlx5_fill_page_frag_array(&conn->qp.wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas)); + + MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + if (err) + goto err_sq_bufs; + + conn->qp.qpn = MLX5_GET(create_qp_out, out, qpn); + mlx5_fpga_dbg(fdev, "Created QP #0x%x\n", conn->qp.qpn); + + goto out; + +err_sq_bufs: + kvfree(conn->qp.sq.bufs); +err_rq_bufs: + kvfree(conn->qp.rq.bufs); +err_wq: + mlx5_wq_destroy(&conn->qp.wq_ctrl); +out: + kvfree(in); + return err; +} + +static void mlx5_fpga_conn_free_recv_bufs(struct mlx5_fpga_conn *conn) +{ + int ix; + + for (ix = 0; ix < conn->qp.rq.size; ix++) { + if (!conn->qp.rq.bufs[ix]) + continue; + mlx5_fpga_conn_unmap_buf(conn, conn->qp.rq.bufs[ix]); + kfree(conn->qp.rq.bufs[ix]); + conn->qp.rq.bufs[ix] = NULL; + } +} + +static void mlx5_fpga_conn_flush_send_bufs(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_dma_buf *buf, *temp; + int ix; + + for (ix = 0; ix < conn->qp.sq.size; ix++) { + buf = conn->qp.sq.bufs[ix]; + if (!buf) + continue; + conn->qp.sq.bufs[ix] = NULL; + mlx5_fpga_conn_unmap_buf(conn, buf); + if (!buf->complete) + continue; + buf->complete(conn, conn->fdev, buf, MLX5_CQE_SYNDROME_WR_FLUSH_ERR); + } + list_for_each_entry_safe(buf, temp, &conn->qp.sq.backlog, list) { + mlx5_fpga_conn_unmap_buf(conn, buf); + if (!buf->complete) + continue; + buf->complete(conn, conn->fdev, buf, MLX5_CQE_SYNDROME_WR_FLUSH_ERR); + } +} + +static void mlx5_fpga_conn_destroy_qp(struct mlx5_fpga_conn *conn) +{ + struct mlx5_core_dev *dev = conn->fdev->mdev; + u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; + + MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, in, qpn, conn->qp.qpn); + mlx5_cmd_exec_in(dev, destroy_qp, in); + + mlx5_fpga_conn_free_recv_bufs(conn); + mlx5_fpga_conn_flush_send_bufs(conn); + kvfree(conn->qp.sq.bufs); + kvfree(conn->qp.rq.bufs); + mlx5_wq_destroy(&conn->qp.wq_ctrl); +} + +static int mlx5_fpga_conn_reset_qp(struct mlx5_fpga_conn *conn) +{ + struct mlx5_core_dev *mdev = conn->fdev->mdev; + u32 in[MLX5_ST_SZ_DW(qp_2rst_in)] = {}; + + mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to RST\n", conn->qp.qpn); + + MLX5_SET(qp_2rst_in, in, opcode, MLX5_CMD_OP_2RST_QP); + MLX5_SET(qp_2rst_in, in, qpn, conn->qp.qpn); + + return mlx5_cmd_exec_in(mdev, qp_2rst, in); +} + +static int mlx5_fpga_conn_init_qp(struct mlx5_fpga_conn *conn) +{ + u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {}; + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + u32 *qpc; + + mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to INIT\n", conn->qp.qpn); + + qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc); + + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX); + MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, MLX5_FPGA_PORT_NUM); + MLX5_SET(qpc, qpc, pd, conn->fdev->conn_res.pdn); + MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn); + MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn); + MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma); + + MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP); + MLX5_SET(rst2init_qp_in, in, qpn, conn->qp.qpn); + + return mlx5_cmd_exec_in(mdev, rst2init_qp, in); +} + +static int mlx5_fpga_conn_rtr_qp(struct mlx5_fpga_conn *conn) +{ + u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {}; + struct mlx5_fpga_device *fdev = conn->fdev; + struct mlx5_core_dev *mdev = fdev->mdev; + u32 *qpc; + + mlx5_fpga_dbg(conn->fdev, "QP RTR\n"); + + qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc); + + MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_1K_BYTES); + MLX5_SET(qpc, qpc, log_msg_max, (u8)MLX5_CAP_GEN(mdev, log_max_msg)); + MLX5_SET(qpc, qpc, remote_qpn, conn->fpga_qpn); + MLX5_SET(qpc, qpc, next_rcv_psn, + MLX5_GET(fpga_qpc, conn->fpga_qpc, next_send_psn)); + MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX); + MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, MLX5_FPGA_PORT_NUM); + ether_addr_copy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32), + MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_mac_47_32)); + MLX5_SET(qpc, qpc, primary_address_path.udp_sport, + MLX5_CAP_ROCE(mdev, r_roce_min_src_udp_port)); + MLX5_SET(qpc, qpc, primary_address_path.src_addr_index, + conn->qp.sgid_index); + MLX5_SET(qpc, qpc, primary_address_path.hop_limit, 0); + memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip), + MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_ip), + MLX5_FLD_SZ_BYTES(qpc, primary_address_path.rgid_rip)); + + MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP); + MLX5_SET(init2rtr_qp_in, in, qpn, conn->qp.qpn); + + return mlx5_cmd_exec_in(mdev, init2rtr_qp, in); +} + +static int mlx5_fpga_conn_rts_qp(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {}; + struct mlx5_core_dev *mdev = fdev->mdev; + u32 *qpc; + + mlx5_fpga_dbg(conn->fdev, "QP RTS\n"); + + qpc = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc); + + MLX5_SET(qpc, qpc, log_ack_req_freq, 8); + MLX5_SET(qpc, qpc, min_rnr_nak, 0x12); + MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x12); /* ~1.07s */ + MLX5_SET(qpc, qpc, next_send_psn, + MLX5_GET(fpga_qpc, conn->fpga_qpc, next_rcv_psn)); + MLX5_SET(qpc, qpc, retry_count, 7); + MLX5_SET(qpc, qpc, rnr_retry, 7); /* Infinite retry if RNR NACK */ + + MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP); + MLX5_SET(rtr2rts_qp_in, in, qpn, conn->qp.qpn); + MLX5_SET(rtr2rts_qp_in, in, opt_param_mask, MLX5_QP_OPTPAR_RNR_TIMEOUT); + + return mlx5_cmd_exec_in(mdev, rtr2rts_qp, in); +} + +static int mlx5_fpga_conn_connect(struct mlx5_fpga_conn *conn) +{ + struct mlx5_fpga_device *fdev = conn->fdev; + int err; + + MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_ACTIVE); + err = mlx5_fpga_modify_qp(conn->fdev->mdev, conn->fpga_qpn, + MLX5_FPGA_QPC_STATE, &conn->fpga_qpc); + if (err) { + mlx5_fpga_err(fdev, "Failed to activate FPGA RC QP: %d\n", err); + goto out; + } + + err = mlx5_fpga_conn_reset_qp(conn); + if (err) { + mlx5_fpga_err(fdev, "Failed to change QP state to reset\n"); + goto err_fpga_qp; + } + + err = mlx5_fpga_conn_init_qp(conn); + if (err) { + mlx5_fpga_err(fdev, "Failed to modify QP from RESET to INIT\n"); + goto err_fpga_qp; + } + conn->qp.active = true; + + while (!mlx5_fpga_conn_post_recv_buf(conn)) + ; + + err = mlx5_fpga_conn_rtr_qp(conn); + if (err) { + mlx5_fpga_err(fdev, "Failed to change QP state from INIT to RTR\n"); + goto err_recv_bufs; + } + + err = mlx5_fpga_conn_rts_qp(conn); + if (err) { + mlx5_fpga_err(fdev, "Failed to change QP state from RTR to RTS\n"); + goto err_recv_bufs; + } + goto out; + +err_recv_bufs: + mlx5_fpga_conn_free_recv_bufs(conn); +err_fpga_qp: + MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_INIT); + if (mlx5_fpga_modify_qp(conn->fdev->mdev, conn->fpga_qpn, + MLX5_FPGA_QPC_STATE, &conn->fpga_qpc)) + mlx5_fpga_err(fdev, "Failed to revert FPGA QP to INIT\n"); +out: + return err; +} + +struct mlx5_fpga_conn *mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev, + struct mlx5_fpga_conn_attr *attr, + enum mlx5_ifc_fpga_qp_type qp_type) +{ + struct mlx5_fpga_conn *ret, *conn; + u8 *remote_mac, *remote_ip; + int err; + + if (!attr->recv_cb) + return ERR_PTR(-EINVAL); + + conn = kzalloc(sizeof(*conn), GFP_KERNEL); + if (!conn) + return ERR_PTR(-ENOMEM); + + conn->fdev = fdev; + INIT_LIST_HEAD(&conn->qp.sq.backlog); + + spin_lock_init(&conn->qp.sq.lock); + + conn->recv_cb = attr->recv_cb; + conn->cb_arg = attr->cb_arg; + + remote_mac = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_mac_47_32); + err = mlx5_query_mac_address(fdev->mdev, remote_mac); + if (err) { + mlx5_fpga_err(fdev, "Failed to query local MAC: %d\n", err); + ret = ERR_PTR(err); + goto err; + } + + /* Build Modified EUI-64 IPv6 address from the MAC address */ + remote_ip = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_ip); + remote_ip[0] = 0xfe; + remote_ip[1] = 0x80; + addrconf_addr_eui48(&remote_ip[8], remote_mac); + + err = mlx5_core_reserved_gid_alloc(fdev->mdev, &conn->qp.sgid_index); + if (err) { + mlx5_fpga_err(fdev, "Failed to allocate SGID: %d\n", err); + ret = ERR_PTR(err); + goto err; + } + + err = mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index, + MLX5_ROCE_VERSION_2, + MLX5_ROCE_L3_TYPE_IPV6, + remote_ip, remote_mac, true, 0, + MLX5_FPGA_PORT_NUM); + if (err) { + mlx5_fpga_err(fdev, "Failed to set SGID: %d\n", err); + ret = ERR_PTR(err); + goto err_rsvd_gid; + } + mlx5_fpga_dbg(fdev, "Reserved SGID index %u\n", conn->qp.sgid_index); + + /* Allow for one cqe per rx/tx wqe, plus one cqe for the next wqe, + * created during processing of the cqe + */ + err = mlx5_fpga_conn_create_cq(conn, + (attr->tx_size + attr->rx_size) * 2); + if (err) { + mlx5_fpga_err(fdev, "Failed to create CQ: %d\n", err); + ret = ERR_PTR(err); + goto err_gid; + } + + mlx5_fpga_conn_arm_cq(conn); + + err = mlx5_fpga_conn_create_qp(conn, attr->tx_size, attr->rx_size); + if (err) { + mlx5_fpga_err(fdev, "Failed to create QP: %d\n", err); + ret = ERR_PTR(err); + goto err_cq; + } + + MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_INIT); + MLX5_SET(fpga_qpc, conn->fpga_qpc, qp_type, qp_type); + MLX5_SET(fpga_qpc, conn->fpga_qpc, st, MLX5_FPGA_QPC_ST_RC); + MLX5_SET(fpga_qpc, conn->fpga_qpc, ether_type, ETH_P_8021Q); + MLX5_SET(fpga_qpc, conn->fpga_qpc, vid, 0); + MLX5_SET(fpga_qpc, conn->fpga_qpc, next_rcv_psn, 1); + MLX5_SET(fpga_qpc, conn->fpga_qpc, next_send_psn, 0); + MLX5_SET(fpga_qpc, conn->fpga_qpc, pkey, MLX5_FPGA_PKEY); + MLX5_SET(fpga_qpc, conn->fpga_qpc, remote_qpn, conn->qp.qpn); + MLX5_SET(fpga_qpc, conn->fpga_qpc, rnr_retry, 7); + MLX5_SET(fpga_qpc, conn->fpga_qpc, retry_count, 7); + + err = mlx5_fpga_create_qp(fdev->mdev, &conn->fpga_qpc, + &conn->fpga_qpn); + if (err) { + mlx5_fpga_err(fdev, "Failed to create FPGA RC QP: %d\n", err); + ret = ERR_PTR(err); + goto err_qp; + } + + err = mlx5_fpga_conn_connect(conn); + if (err) { + ret = ERR_PTR(err); + goto err_conn; + } + + mlx5_fpga_dbg(fdev, "FPGA QPN is %u\n", conn->fpga_qpn); + ret = conn; + goto out; + +err_conn: + mlx5_fpga_destroy_qp(conn->fdev->mdev, conn->fpga_qpn); +err_qp: + mlx5_fpga_conn_destroy_qp(conn); +err_cq: + mlx5_fpga_conn_destroy_cq(conn); +err_gid: + mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index, 0, 0, NULL, + NULL, false, 0, MLX5_FPGA_PORT_NUM); +err_rsvd_gid: + mlx5_core_reserved_gid_free(fdev->mdev, conn->qp.sgid_index); +err: + kfree(conn); +out: + return ret; +} + +void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn) +{ + conn->qp.active = false; + tasklet_disable(&conn->cq.tasklet); + synchronize_irq(conn->cq.mcq.irqn); + + mlx5_fpga_destroy_qp(conn->fdev->mdev, conn->fpga_qpn); + mlx5_fpga_conn_destroy_qp(conn); + mlx5_fpga_conn_destroy_cq(conn); + + mlx5_core_roce_gid_set(conn->fdev->mdev, conn->qp.sgid_index, 0, 0, + NULL, NULL, false, 0, MLX5_FPGA_PORT_NUM); + mlx5_core_reserved_gid_free(conn->fdev->mdev, conn->qp.sgid_index); + kfree(conn); +} + +int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev) +{ + int err; + + err = mlx5_nic_vport_enable_roce(fdev->mdev); + if (err) { + mlx5_fpga_err(fdev, "Failed to enable RoCE: %d\n", err); + goto out; + } + + fdev->conn_res.uar = mlx5_get_uars_page(fdev->mdev); + if (IS_ERR(fdev->conn_res.uar)) { + err = PTR_ERR(fdev->conn_res.uar); + mlx5_fpga_err(fdev, "get_uars_page failed, %d\n", err); + goto err_roce; + } + mlx5_fpga_dbg(fdev, "Allocated UAR index %u\n", + fdev->conn_res.uar->index); + + err = mlx5_core_alloc_pd(fdev->mdev, &fdev->conn_res.pdn); + if (err) { + mlx5_fpga_err(fdev, "alloc pd failed, %d\n", err); + goto err_uar; + } + mlx5_fpga_dbg(fdev, "Allocated PD %u\n", fdev->conn_res.pdn); + + err = mlx5_fpga_conn_create_mkey(fdev->mdev, fdev->conn_res.pdn, + &fdev->conn_res.mkey); + if (err) { + mlx5_fpga_err(fdev, "create mkey failed, %d\n", err); + goto err_dealloc_pd; + } + mlx5_fpga_dbg(fdev, "Created mkey 0x%x\n", fdev->conn_res.mkey); + + return 0; + +err_dealloc_pd: + mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn); +err_uar: + mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar); +err_roce: + mlx5_nic_vport_disable_roce(fdev->mdev); +out: + return err; +} + +void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev) +{ + mlx5_core_destroy_mkey(fdev->mdev, fdev->conn_res.mkey); + mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn); + mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar); + mlx5_nic_vport_disable_roce(fdev->mdev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h new file mode 100644 index 0000000000..5116e869a6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __MLX5_FPGA_CONN_H__ +#define __MLX5_FPGA_CONN_H__ + +#include +#include + +#include "fpga/core.h" +#include "fpga/sdk.h" +#include "wq.h" + +struct mlx5_fpga_conn { + struct mlx5_fpga_device *fdev; + + void (*recv_cb)(void *cb_arg, struct mlx5_fpga_dma_buf *buf); + void *cb_arg; + + /* FPGA QP */ + u32 fpga_qpc[MLX5_ST_SZ_DW(fpga_qpc)]; + u32 fpga_qpn; + + /* CQ */ + struct { + struct mlx5_cqwq wq; + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5_core_cq mcq; + struct tasklet_struct tasklet; + } cq; + + /* QP */ + struct { + bool active; + int sgid_index; + struct mlx5_wq_qp wq; + struct mlx5_wq_ctrl wq_ctrl; + u32 qpn; + struct { + spinlock_t lock; /* Protects all SQ state */ + unsigned int pc; + unsigned int cc; + unsigned int size; + struct mlx5_fpga_dma_buf **bufs; + struct list_head backlog; + } sq; + struct { + unsigned int pc; + unsigned int cc; + unsigned int size; + struct mlx5_fpga_dma_buf **bufs; + } rq; + } qp; +}; + +int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev); +void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev); +struct mlx5_fpga_conn * +mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev, + struct mlx5_fpga_conn_attr *attr, + enum mlx5_ifc_fpga_qp_type qp_type); +void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn); +int mlx5_fpga_conn_send(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf); + +#endif /* __MLX5_FPGA_CONN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c new file mode 100644 index 0000000000..e5c1012921 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c @@ -0,0 +1,375 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "mlx5_core.h" +#include "lib/mlx5.h" +#include "lib/eq.h" +#include "fpga/core.h" +#include "fpga/conn.h" + +static const char *const mlx5_fpga_error_strings[] = { + "Null Syndrome", + "Corrupted DDR", + "Flash Timeout", + "Internal Link Error", + "Watchdog HW Failure", + "I2C Failure", + "Image Changed", + "Temperature Critical", +}; + +static const char * const mlx5_fpga_qp_error_strings[] = { + "Null Syndrome", + "Retry Counter Expired", + "RNR Expired", +}; +static struct mlx5_fpga_device *mlx5_fpga_device_alloc(void) +{ + struct mlx5_fpga_device *fdev; + + fdev = kzalloc(sizeof(*fdev), GFP_KERNEL); + if (!fdev) + return NULL; + + spin_lock_init(&fdev->state_lock); + fdev->state = MLX5_FPGA_STATUS_NONE; + return fdev; +} + +static const char *mlx5_fpga_image_name(enum mlx5_fpga_image image) +{ + switch (image) { + case MLX5_FPGA_IMAGE_USER: + return "user"; + case MLX5_FPGA_IMAGE_FACTORY: + return "factory"; + default: + return "unknown"; + } +} + +static const char *mlx5_fpga_name(u32 fpga_id) +{ + static char ret[32]; + + switch (fpga_id) { + case MLX5_FPGA_NEWTON: + return "Newton"; + case MLX5_FPGA_EDISON: + return "Edison"; + case MLX5_FPGA_MORSE: + return "Morse"; + case MLX5_FPGA_MORSEQ: + return "MorseQ"; + } + + snprintf(ret, sizeof(ret), "Unknown %d", fpga_id); + return ret; +} + +static int mlx5_is_fpga_lookaside(u32 fpga_id) +{ + return fpga_id != MLX5_FPGA_NEWTON && fpga_id != MLX5_FPGA_EDISON; +} + +static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev) +{ + struct mlx5_fpga_query query; + int err; + + err = mlx5_fpga_query(fdev->mdev, &query); + if (err) { + mlx5_fpga_err(fdev, "Failed to query status: %d\n", err); + return err; + } + + fdev->last_admin_image = query.admin_image; + fdev->last_oper_image = query.oper_image; + + mlx5_fpga_info(fdev, "Status %u; Admin image %u; Oper image %u\n", + query.status, query.admin_image, query.oper_image); + + /* for FPGA lookaside projects FPGA load status is not important */ + if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id))) + return 0; + + if (query.status != MLX5_FPGA_STATUS_SUCCESS) { + mlx5_fpga_err(fdev, "%s image failed to load; status %u\n", + mlx5_fpga_image_name(fdev->last_oper_image), + query.status); + return -EIO; + } + + return 0; +} + +static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev) +{ + int err; + struct mlx5_core_dev *mdev = fdev->mdev; + + err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON); + if (err) { + mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err); + return err; + } + err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX); + if (err) { + mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err); + return err; + } + err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF); + if (err) { + mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err); + return err; + } + return 0; +} + +static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *); + +static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe) +{ + struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb); + + return mlx5_fpga_event(fdev, event, eqe); +} + +static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe) +{ + struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb); + + return mlx5_fpga_event(fdev, event, eqe); +} + +int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + unsigned int max_num_qps; + unsigned long flags; + u32 fpga_id; + int err; + + if (!fdev) + return 0; + + err = mlx5_fpga_caps(fdev->mdev); + if (err) + goto out; + + err = mlx5_fpga_device_load_check(fdev); + if (err) + goto out; + + fpga_id = MLX5_CAP_FPGA(fdev->mdev, fpga_id); + mlx5_fpga_info(fdev, "FPGA card %s:%u\n", mlx5_fpga_name(fpga_id), fpga_id); + + /* No QPs if FPGA does not participate in net processing */ + if (mlx5_is_fpga_lookaside(fpga_id)) + goto out; + + mlx5_fpga_info(fdev, "%s(%d): image, version %u; SBU %06x:%04x version %d\n", + mlx5_fpga_image_name(fdev->last_oper_image), + fdev->last_oper_image, + MLX5_CAP_FPGA(fdev->mdev, image_version), + MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id), + MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id), + MLX5_CAP_FPGA(fdev->mdev, sandbox_product_version)); + + max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps); + if (!max_num_qps) { + mlx5_fpga_err(fdev, "FPGA reports 0 QPs in SHELL_CAPS\n"); + err = -ENOTSUPP; + goto out; + } + + err = mlx5_core_reserve_gids(mdev, max_num_qps); + if (err) + goto out; + + MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR); + MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR); + mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_err_nb); + mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_qp_err_nb); + + err = mlx5_fpga_conn_device_init(fdev); + if (err) + goto err_rsvd_gid; + + if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) { + err = mlx5_fpga_device_brb(fdev); + if (err) + goto err_conn_init; + } + + goto out; + +err_conn_init: + mlx5_fpga_conn_device_cleanup(fdev); + +err_rsvd_gid: + mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb); + mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb); + mlx5_core_unreserve_gids(mdev, max_num_qps); +out: + spin_lock_irqsave(&fdev->state_lock, flags); + fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS; + spin_unlock_irqrestore(&fdev->state_lock, flags); + return err; +} + +int mlx5_fpga_init(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev; + + if (!MLX5_CAP_GEN(mdev, fpga)) { + mlx5_core_dbg(mdev, "FPGA capability not present\n"); + return 0; + } + + mlx5_core_dbg(mdev, "Initializing FPGA\n"); + + fdev = mlx5_fpga_device_alloc(); + if (!fdev) + return -ENOMEM; + + fdev->mdev = mdev; + mdev->fpga = fdev; + + return 0; +} + +void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + unsigned int max_num_qps; + unsigned long flags; + int err; + + if (!fdev) + return; + + if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id))) + return; + + spin_lock_irqsave(&fdev->state_lock, flags); + if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) { + spin_unlock_irqrestore(&fdev->state_lock, flags); + return; + } + fdev->state = MLX5_FPGA_STATUS_NONE; + spin_unlock_irqrestore(&fdev->state_lock, flags); + + if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) { + err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON); + if (err) + mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n", + err); + } + + mlx5_fpga_conn_device_cleanup(fdev); + mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb); + mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb); + + max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps); + mlx5_core_unreserve_gids(mdev, max_num_qps); +} + +void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev) +{ + struct mlx5_fpga_device *fdev = mdev->fpga; + + mlx5_fpga_device_stop(mdev); + kfree(fdev); + mdev->fpga = NULL; +} + +static const char *mlx5_fpga_syndrome_to_string(u8 syndrome) +{ + if (syndrome < ARRAY_SIZE(mlx5_fpga_error_strings)) + return mlx5_fpga_error_strings[syndrome]; + return "Unknown"; +} + +static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome) +{ + if (syndrome < ARRAY_SIZE(mlx5_fpga_qp_error_strings)) + return mlx5_fpga_qp_error_strings[syndrome]; + return "Unknown"; +} + +static int mlx5_fpga_event(struct mlx5_fpga_device *fdev, + unsigned long event, void *eqe) +{ + void *data = ((struct mlx5_eqe *)eqe)->data.raw; + const char *event_name; + bool teardown = false; + unsigned long flags; + u8 syndrome; + + switch (event) { + case MLX5_EVENT_TYPE_FPGA_ERROR: + syndrome = MLX5_GET(fpga_error_event, data, syndrome); + event_name = mlx5_fpga_syndrome_to_string(syndrome); + break; + case MLX5_EVENT_TYPE_FPGA_QP_ERROR: + syndrome = MLX5_GET(fpga_qp_error_event, data, syndrome); + event_name = mlx5_fpga_qp_syndrome_to_string(syndrome); + break; + default: + return NOTIFY_DONE; + } + + spin_lock_irqsave(&fdev->state_lock, flags); + switch (fdev->state) { + case MLX5_FPGA_STATUS_SUCCESS: + mlx5_fpga_warn(fdev, "Error %u: %s\n", syndrome, event_name); + teardown = true; + break; + default: + mlx5_fpga_warn_ratelimited(fdev, "Unexpected error event %u: %s\n", + syndrome, event_name); + } + spin_unlock_irqrestore(&fdev->state_lock, flags); + /* We tear-down the card's interfaces and functionality because + * the FPGA bump-on-the-wire is misbehaving and we lose ability + * to communicate with the network. User may still be able to + * recover by re-programming or debugging the FPGA + */ + if (teardown) + mlx5_trigger_health_work(fdev->mdev); + + return NOTIFY_OK; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h new file mode 100644 index 0000000000..750c320501 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_FPGA_CORE_H__ +#define __MLX5_FPGA_CORE_H__ + +#ifdef CONFIG_MLX5_FPGA + +#include + +#include "mlx5_core.h" +#include "lib/eq.h" +#include "fpga/cmd.h" + +/* Represents an Innova device */ +struct mlx5_fpga_device { + struct mlx5_core_dev *mdev; + struct mlx5_nb fpga_err_nb; + struct mlx5_nb fpga_qp_err_nb; + spinlock_t state_lock; /* Protects state transitions */ + enum mlx5_fpga_status state; + enum mlx5_fpga_image last_admin_image; + enum mlx5_fpga_image last_oper_image; + + /* QP Connection resources */ + struct { + u32 pdn; + u32 mkey; + struct mlx5_uars_page *uar; + } conn_res; +}; + +#define mlx5_fpga_dbg(__adev, format, ...) \ + mlx5_core_dbg((__adev)->mdev, "FPGA: %s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, ##__VA_ARGS__) + +#define mlx5_fpga_err(__adev, format, ...) \ + mlx5_core_err((__adev)->mdev, "FPGA: %s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, ##__VA_ARGS__) + +#define mlx5_fpga_warn(__adev, format, ...) \ + mlx5_core_warn((__adev)->mdev, "FPGA: %s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, ##__VA_ARGS__) + +#define mlx5_fpga_warn_ratelimited(__adev, format, ...) \ + mlx5_core_err_rl((__adev)->mdev, "FPGA: %s:%d: " \ + format, __func__, __LINE__, ##__VA_ARGS__) + +#define mlx5_fpga_notice(__adev, format, ...) \ + mlx5_core_info((__adev)->mdev, "FPGA: " format, ##__VA_ARGS__) + +#define mlx5_fpga_info(__adev, format, ...) \ + mlx5_core_info((__adev)->mdev, "FPGA: " format, ##__VA_ARGS__) + +int mlx5_fpga_init(struct mlx5_core_dev *mdev); +void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev); +int mlx5_fpga_device_start(struct mlx5_core_dev *mdev); +void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev); + +#else + +static inline int mlx5_fpga_init(struct mlx5_core_dev *mdev) +{ + return 0; +} + +static inline void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev) +{ +} + +static inline int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) +{ + return 0; +} + +static inline void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev) +{ +} + +#endif + +#endif /* __MLX5_FPGA_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c new file mode 100644 index 0000000000..14962969c5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include + +#include "fpga/core.h" +#include "fpga/conn.h" +#include "fpga/sdk.h" + +struct mlx5_fpga_conn * +mlx5_fpga_sbu_conn_create(struct mlx5_fpga_device *fdev, + struct mlx5_fpga_conn_attr *attr) +{ + return mlx5_fpga_conn_create(fdev, attr, MLX5_FPGA_QPC_QP_TYPE_SANDBOX_QP); +} +EXPORT_SYMBOL(mlx5_fpga_sbu_conn_create); + +void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn) +{ + mlx5_fpga_conn_destroy(conn); +} +EXPORT_SYMBOL(mlx5_fpga_sbu_conn_destroy); + +int mlx5_fpga_sbu_conn_sendmsg(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf) +{ + return mlx5_fpga_conn_send(conn, buf); +} +EXPORT_SYMBOL(mlx5_fpga_sbu_conn_sendmsg); + +static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size, + u64 addr, u8 *buf) +{ + size_t max_size = MLX5_FPGA_ACCESS_REG_SIZE_MAX; + size_t bytes_done = 0; + u8 actual_size; + int err; + + if (!size) + return -EINVAL; + + if (!fdev->mdev) + return -ENOTCONN; + + while (bytes_done < size) { + actual_size = min(max_size, (size - bytes_done)); + + err = mlx5_fpga_access_reg(fdev->mdev, actual_size, + addr + bytes_done, + buf + bytes_done, false); + if (err) { + mlx5_fpga_err(fdev, "Failed to read over I2C: %d\n", + err); + break; + } + + bytes_done += actual_size; + } + + return err; +} + +static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size, + u64 addr, u8 *buf) +{ + size_t max_size = MLX5_FPGA_ACCESS_REG_SIZE_MAX; + size_t bytes_done = 0; + u8 actual_size; + int err; + + if (!size) + return -EINVAL; + + if (!fdev->mdev) + return -ENOTCONN; + + while (bytes_done < size) { + actual_size = min(max_size, (size - bytes_done)); + + err = mlx5_fpga_access_reg(fdev->mdev, actual_size, + addr + bytes_done, + buf + bytes_done, true); + if (err) { + mlx5_fpga_err(fdev, "Failed to write FPGA crspace\n"); + break; + } + + bytes_done += actual_size; + } + + return err; +} + +int mlx5_fpga_mem_read(struct mlx5_fpga_device *fdev, size_t size, u64 addr, + void *buf, enum mlx5_fpga_access_type access_type) +{ + int ret; + + switch (access_type) { + case MLX5_FPGA_ACCESS_TYPE_I2C: + ret = mlx5_fpga_mem_read_i2c(fdev, size, addr, buf); + if (ret) + return ret; + break; + default: + mlx5_fpga_warn(fdev, "Unexpected read access_type %u\n", + access_type); + return -EACCES; + } + + return size; +} +EXPORT_SYMBOL(mlx5_fpga_mem_read); + +int mlx5_fpga_mem_write(struct mlx5_fpga_device *fdev, size_t size, u64 addr, + void *buf, enum mlx5_fpga_access_type access_type) +{ + int ret; + + switch (access_type) { + case MLX5_FPGA_ACCESS_TYPE_I2C: + ret = mlx5_fpga_mem_write_i2c(fdev, size, addr, buf); + if (ret) + return ret; + break; + default: + mlx5_fpga_warn(fdev, "Unexpected write access_type %u\n", + access_type); + return -EACCES; + } + + return size; +} +EXPORT_SYMBOL(mlx5_fpga_mem_write); + +int mlx5_fpga_get_sbu_caps(struct mlx5_fpga_device *fdev, int size, void *buf) +{ + return mlx5_fpga_sbu_caps(fdev->mdev, buf, size); +} +EXPORT_SYMBOL(mlx5_fpga_get_sbu_caps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h new file mode 100644 index 0000000000..89ef592656 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef MLX5_FPGA_SDK_H +#define MLX5_FPGA_SDK_H + +#include +#include + +/** + * DOC: Innova SDK + * This header defines the in-kernel API for Innova FPGA client drivers. + */ +#define SBU_QP_QUEUE_SIZE 8 +#define MLX5_FPGA_CMD_TIMEOUT_MSEC (60 * 1000) + +/** + * enum mlx5_fpga_access_type - Enumerated the different methods possible for + * accessing the device memory address space + * + * @MLX5_FPGA_ACCESS_TYPE_I2C: Use the slow CX-FPGA I2C bus + * @MLX5_FPGA_ACCESS_TYPE_DONTCARE: Use the fastest available method + */ +enum mlx5_fpga_access_type { + MLX5_FPGA_ACCESS_TYPE_I2C = 0x0, + MLX5_FPGA_ACCESS_TYPE_DONTCARE = 0x0, +}; + +struct mlx5_fpga_conn; +struct mlx5_fpga_device; + +/** + * struct mlx5_fpga_dma_entry - A scatter-gather DMA entry + */ +struct mlx5_fpga_dma_entry { + /** @data: Virtual address pointer to the data */ + void *data; + /** @size: Size in bytes of the data */ + unsigned int size; + /** @dma_addr: Private member. Physical DMA-mapped address of the data */ + dma_addr_t dma_addr; +}; + +/** + * struct mlx5_fpga_dma_buf - A packet buffer + * May contain up to 2 scatter-gather data entries + */ +struct mlx5_fpga_dma_buf { + /** @dma_dir: DMA direction */ + enum dma_data_direction dma_dir; + /** @sg: Scatter-gather entries pointing to the data in memory */ + struct mlx5_fpga_dma_entry sg[2]; + /** @list: Item in SQ backlog, for TX packets */ + struct list_head list; + /** + * @complete: Completion routine, for TX packets + * @conn: FPGA Connection this packet was sent to + * @fdev: FPGA device this packet was sent to + * @buf: The packet buffer + * @status: 0 if successful, or an error code otherwise + */ + void (*complete)(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_device *fdev, + struct mlx5_fpga_dma_buf *buf, u8 status); +}; + +/** + * struct mlx5_fpga_conn_attr - FPGA connection attributes + * Describes the attributes of a connection + */ +struct mlx5_fpga_conn_attr { + /** @tx_size: Size of connection TX queue, in packets */ + unsigned int tx_size; + /** @rx_size: Size of connection RX queue, in packets */ + unsigned int rx_size; + /** + * @recv_cb: Callback function which is called for received packets + * @cb_arg: The value provided in mlx5_fpga_conn_attr.cb_arg + * @buf: A buffer containing a received packet + * + * buf is guaranteed to only contain a single scatter-gather entry. + * The size of the actual packet received is specified in buf.sg[0].size + * When this callback returns, the packet buffer may be re-used for + * subsequent receives. + */ + void (*recv_cb)(void *cb_arg, struct mlx5_fpga_dma_buf *buf); + /** @cb_arg: A context to be passed to recv_cb callback */ + void *cb_arg; +}; + +/** + * mlx5_fpga_sbu_conn_create() - Initialize a new FPGA SBU connection + * @fdev: The FPGA device + * @attr: Attributes of the new connection + * + * Sets up a new FPGA SBU connection with the specified attributes. + * The receive callback function may be called for incoming messages even + * before this function returns. + * + * The caller must eventually destroy the connection by calling + * mlx5_fpga_sbu_conn_destroy. + * + * Return: A new connection, or ERR_PTR() error value otherwise. + */ +struct mlx5_fpga_conn * +mlx5_fpga_sbu_conn_create(struct mlx5_fpga_device *fdev, + struct mlx5_fpga_conn_attr *attr); + +/** + * mlx5_fpga_sbu_conn_destroy() - Destroy an FPGA SBU connection + * @conn: The FPGA SBU connection to destroy + * + * Cleans up an FPGA SBU connection which was previously created with + * mlx5_fpga_sbu_conn_create. + */ +void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn); + +/** + * mlx5_fpga_sbu_conn_sendmsg() - Queue the transmission of a packet + * @conn: An FPGA SBU connection + * @buf: The packet buffer + * + * Queues a packet for transmission over an FPGA SBU connection. + * The buffer should not be modified or freed until completion. + * Upon completion, the buf's complete() callback is invoked, indicating the + * success or error status of the transmission. + * + * Return: 0 if successful, or an error value otherwise. + */ +int mlx5_fpga_sbu_conn_sendmsg(struct mlx5_fpga_conn *conn, + struct mlx5_fpga_dma_buf *buf); + +/** + * mlx5_fpga_mem_read() - Read from FPGA memory address space + * @fdev: The FPGA device + * @size: Size of chunk to read, in bytes + * @addr: Starting address to read from, in FPGA address space + * @buf: Buffer to read into + * @access_type: Method for reading + * + * Reads from the specified address into the specified buffer. + * The address may point to configuration space or to DDR. + * Large reads may be performed internally as several non-atomic operations. + * This function may sleep, so should not be called from atomic contexts. + * + * Return: 0 if successful, or an error value otherwise. + */ +int mlx5_fpga_mem_read(struct mlx5_fpga_device *fdev, size_t size, u64 addr, + void *buf, enum mlx5_fpga_access_type access_type); + +/** + * mlx5_fpga_mem_write() - Write to FPGA memory address space + * @fdev: The FPGA device + * @size: Size of chunk to write, in bytes + * @addr: Starting address to write to, in FPGA address space + * @buf: Buffer which contains data to write + * @access_type: Method for writing + * + * Writes the specified buffer data to FPGA memory at the specified address. + * The address may point to configuration space or to DDR. + * Large writes may be performed internally as several non-atomic operations. + * This function may sleep, so should not be called from atomic contexts. + * + * Return: 0 if successful, or an error value otherwise. + */ +int mlx5_fpga_mem_write(struct mlx5_fpga_device *fdev, size_t size, u64 addr, + void *buf, enum mlx5_fpga_access_type access_type); + +/** + * mlx5_fpga_get_sbu_caps() - Read the SBU capabilities + * @fdev: The FPGA device + * @size: Size of the buffer to read into + * @buf: Buffer to read the capabilities into + * + * Reads the FPGA SBU capabilities into the specified buffer. + * The format of the capabilities buffer is SBU-dependent. + * + * Return: 0 if successful + * -EINVAL if the buffer is not large enough to contain SBU caps + * or any other error value otherwise. + */ +int mlx5_fpga_get_sbu_caps(struct mlx5_fpga_device *fdev, int size, void *buf); + +#endif /* MLX5_FPGA_SDK_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c new file mode 100644 index 0000000000..b29299c49a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -0,0 +1,1148 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "fs_core.h" +#include "fs_cmd.h" +#include "fs_ft_pool.h" +#include "mlx5_core.h" +#include "eswitch.h" + +static int mlx5_cmd_stub_update_root_ft(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + u32 underlay_qpn, + bool disconnect) +{ + return 0; +} + +static int mlx5_cmd_stub_create_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table_attr *ft_attr, + struct mlx5_flow_table *next_ft) +{ + int max_fte = ft_attr->max_fte; + + ft->max_fte = max_fte ? roundup_pow_of_two(max_fte) : 1; + + return 0; +} + +static int mlx5_cmd_stub_destroy_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft) +{ + return 0; +} + +static int mlx5_cmd_stub_modify_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) +{ + return 0; +} + +static int mlx5_cmd_stub_create_flow_group(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + u32 *in, + struct mlx5_flow_group *fg) +{ + return 0; +} + +static int mlx5_cmd_stub_destroy_flow_group(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg) +{ + return 0; +} + +static int mlx5_cmd_stub_create_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *group, + struct fs_fte *fte) +{ + return 0; +} + +static int mlx5_cmd_stub_update_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *group, + int modify_mask, + struct fs_fte *fte) +{ + return -EOPNOTSUPP; +} + +static int mlx5_cmd_stub_delete_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct fs_fte *fte) +{ + return 0; +} + +static int mlx5_cmd_stub_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat_params *params, + enum mlx5_flow_namespace_type namespace, + struct mlx5_pkt_reformat *pkt_reformat) +{ + return 0; +} + +static void mlx5_cmd_stub_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat *pkt_reformat) +{ +} + +static int mlx5_cmd_stub_modify_header_alloc(struct mlx5_flow_root_namespace *ns, + u8 namespace, u8 num_actions, + void *modify_actions, + struct mlx5_modify_hdr *modify_hdr) +{ + return 0; +} + +static void mlx5_cmd_stub_modify_header_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_modify_hdr *modify_hdr) +{ +} + +static int mlx5_cmd_stub_set_peer(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns, + u16 peer_vhca_id) +{ + return 0; +} + +static int mlx5_cmd_stub_create_ns(struct mlx5_flow_root_namespace *ns) +{ + return 0; +} + +static int mlx5_cmd_stub_destroy_ns(struct mlx5_flow_root_namespace *ns) +{ + return 0; +} + +static u32 mlx5_cmd_stub_get_capabilities(struct mlx5_flow_root_namespace *ns, + enum fs_flow_table_type ft_type) +{ + return 0; +} + +static int mlx5_cmd_set_slave_root_fdb(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave, + bool ft_id_valid, + u32 ft_id) +{ + u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {}; + u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {}; + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_namespace *ns; + + MLX5_SET(set_flow_table_root_in, in, opcode, + MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); + MLX5_SET(set_flow_table_root_in, in, table_type, + FS_FT_FDB); + if (ft_id_valid) { + MLX5_SET(set_flow_table_root_in, in, + table_eswitch_owner_vhca_id_valid, 1); + MLX5_SET(set_flow_table_root_in, in, + table_eswitch_owner_vhca_id, + MLX5_CAP_GEN(master, vhca_id)); + MLX5_SET(set_flow_table_root_in, in, table_id, + ft_id); + } else { + ns = mlx5_get_flow_namespace(slave, + MLX5_FLOW_NAMESPACE_FDB); + root = find_root(&ns->node); + MLX5_SET(set_flow_table_root_in, in, table_id, + root->root_ft->id); + } + + return mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out)); +} + +static int +mlx5_cmd_stub_destroy_match_definer(struct mlx5_flow_root_namespace *ns, + int definer_id) +{ + return 0; +} + +static int +mlx5_cmd_stub_create_match_definer(struct mlx5_flow_root_namespace *ns, + u16 format_id, u32 *match_mask) +{ + return 0; +} + +static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, u32 underlay_qpn, + bool disconnect) +{ + u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {}; + struct mlx5_core_dev *dev = ns->dev; + int err; + + if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && + underlay_qpn == 0) + return 0; + + if (ft->type == FS_FT_FDB && + mlx5_lag_is_shared_fdb(dev) && + !mlx5_lag_is_master(dev)) + return 0; + + MLX5_SET(set_flow_table_root_in, in, opcode, + MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); + MLX5_SET(set_flow_table_root_in, in, table_type, ft->type); + + if (disconnect) + MLX5_SET(set_flow_table_root_in, in, op_mod, 1); + else + MLX5_SET(set_flow_table_root_in, in, table_id, ft->id); + + MLX5_SET(set_flow_table_root_in, in, underlay_qpn, underlay_qpn); + MLX5_SET(set_flow_table_root_in, in, vport_number, ft->vport); + MLX5_SET(set_flow_table_root_in, in, other_vport, + !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT)); + + err = mlx5_cmd_exec_in(dev, set_flow_table_root, in); + if (!err && + ft->type == FS_FT_FDB && + mlx5_lag_is_shared_fdb(dev) && + mlx5_lag_is_master(dev)) { + struct mlx5_core_dev *peer_dev; + int i, j; + + mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) { + err = mlx5_cmd_set_slave_root_fdb(dev, peer_dev, !disconnect, + (!disconnect) ? ft->id : 0); + if (err && !disconnect) { + mlx5_lag_for_each_peer_mdev(dev, peer_dev, j) { + if (j < i) + mlx5_cmd_set_slave_root_fdb(dev, peer_dev, 1, + ns->root_ft->id); + else + break; + } + + MLX5_SET(set_flow_table_root_in, in, op_mod, 0); + MLX5_SET(set_flow_table_root_in, in, table_id, + ns->root_ft->id); + mlx5_cmd_exec_in(dev, set_flow_table_root, in); + } + if (err) + break; + } + + } + + return err; +} + +static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table_attr *ft_attr, + struct mlx5_flow_table *next_ft) +{ + int en_encap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT); + int en_decap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + int term = !!(ft->flags & MLX5_FLOW_TABLE_TERMINATION); + u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {}; + u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {}; + struct mlx5_core_dev *dev = ns->dev; + unsigned int size; + int err; + + size = mlx5_ft_pool_get_avail_sz(dev, ft->type, ft_attr->max_fte); + if (!size) + return -ENOSPC; + + MLX5_SET(create_flow_table_in, in, opcode, + MLX5_CMD_OP_CREATE_FLOW_TABLE); + + MLX5_SET(create_flow_table_in, in, uid, ft_attr->uid); + MLX5_SET(create_flow_table_in, in, table_type, ft->type); + MLX5_SET(create_flow_table_in, in, flow_table_context.level, ft->level); + MLX5_SET(create_flow_table_in, in, flow_table_context.log_size, size ? ilog2(size) : 0); + MLX5_SET(create_flow_table_in, in, vport_number, ft->vport); + MLX5_SET(create_flow_table_in, in, other_vport, + !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT)); + + MLX5_SET(create_flow_table_in, in, flow_table_context.decap_en, + en_decap); + MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en, + en_encap); + MLX5_SET(create_flow_table_in, in, flow_table_context.termination_table, + term); + + switch (ft->op_mod) { + case FS_FT_OP_MOD_NORMAL: + if (next_ft) { + MLX5_SET(create_flow_table_in, in, + flow_table_context.table_miss_action, + MLX5_FLOW_TABLE_MISS_ACTION_FWD); + MLX5_SET(create_flow_table_in, in, + flow_table_context.table_miss_id, next_ft->id); + } else { + MLX5_SET(create_flow_table_in, in, + flow_table_context.table_miss_action, + ft->def_miss_action); + } + break; + + case FS_FT_OP_MOD_LAG_DEMUX: + MLX5_SET(create_flow_table_in, in, op_mod, 0x1); + if (next_ft) + MLX5_SET(create_flow_table_in, in, + flow_table_context.lag_master_next_table_id, + next_ft->id); + break; + } + + err = mlx5_cmd_exec_inout(dev, create_flow_table, in, out); + if (!err) { + ft->id = MLX5_GET(create_flow_table_out, out, + table_id); + ft->max_fte = size; + } else { + mlx5_ft_pool_put_sz(ns->dev, size); + } + + return err; +} + +static int mlx5_cmd_destroy_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft) +{ + u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {}; + struct mlx5_core_dev *dev = ns->dev; + int err; + + MLX5_SET(destroy_flow_table_in, in, opcode, + MLX5_CMD_OP_DESTROY_FLOW_TABLE); + MLX5_SET(destroy_flow_table_in, in, table_type, ft->type); + MLX5_SET(destroy_flow_table_in, in, table_id, ft->id); + MLX5_SET(destroy_flow_table_in, in, vport_number, ft->vport); + MLX5_SET(destroy_flow_table_in, in, other_vport, + !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT)); + + err = mlx5_cmd_exec_in(dev, destroy_flow_table, in); + if (!err) + mlx5_ft_pool_put_sz(ns->dev, ft->max_fte); + + return err; +} + +static int mlx5_cmd_modify_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) +{ + u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)] = {}; + struct mlx5_core_dev *dev = ns->dev; + + MLX5_SET(modify_flow_table_in, in, opcode, + MLX5_CMD_OP_MODIFY_FLOW_TABLE); + MLX5_SET(modify_flow_table_in, in, table_type, ft->type); + MLX5_SET(modify_flow_table_in, in, table_id, ft->id); + + if (ft->op_mod == FS_FT_OP_MOD_LAG_DEMUX) { + MLX5_SET(modify_flow_table_in, in, modify_field_select, + MLX5_MODIFY_FLOW_TABLE_LAG_NEXT_TABLE_ID); + if (next_ft) { + MLX5_SET(modify_flow_table_in, in, + flow_table_context.lag_master_next_table_id, next_ft->id); + } else { + MLX5_SET(modify_flow_table_in, in, + flow_table_context.lag_master_next_table_id, 0); + } + } else { + MLX5_SET(modify_flow_table_in, in, vport_number, ft->vport); + MLX5_SET(modify_flow_table_in, in, other_vport, + !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT)); + MLX5_SET(modify_flow_table_in, in, modify_field_select, + MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID); + if (next_ft) { + MLX5_SET(modify_flow_table_in, in, + flow_table_context.table_miss_action, + MLX5_FLOW_TABLE_MISS_ACTION_FWD); + MLX5_SET(modify_flow_table_in, in, + flow_table_context.table_miss_id, + next_ft->id); + } else { + MLX5_SET(modify_flow_table_in, in, + flow_table_context.table_miss_action, + ft->def_miss_action); + } + } + + return mlx5_cmd_exec_in(dev, modify_flow_table, in); +} + +static int mlx5_cmd_create_flow_group(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + u32 *in, + struct mlx5_flow_group *fg) +{ + u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {}; + struct mlx5_core_dev *dev = ns->dev; + int err; + + MLX5_SET(create_flow_group_in, in, opcode, + MLX5_CMD_OP_CREATE_FLOW_GROUP); + MLX5_SET(create_flow_group_in, in, table_type, ft->type); + MLX5_SET(create_flow_group_in, in, table_id, ft->id); + MLX5_SET(create_flow_group_in, in, vport_number, ft->vport); + MLX5_SET(create_flow_group_in, in, other_vport, + !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT)); + err = mlx5_cmd_exec_inout(dev, create_flow_group, in, out); + if (!err) + fg->id = MLX5_GET(create_flow_group_out, out, + group_id); + return err; +} + +static int mlx5_cmd_destroy_flow_group(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg) +{ + u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)] = {}; + struct mlx5_core_dev *dev = ns->dev; + + MLX5_SET(destroy_flow_group_in, in, opcode, + MLX5_CMD_OP_DESTROY_FLOW_GROUP); + MLX5_SET(destroy_flow_group_in, in, table_type, ft->type); + MLX5_SET(destroy_flow_group_in, in, table_id, ft->id); + MLX5_SET(destroy_flow_group_in, in, group_id, fg->id); + MLX5_SET(destroy_flow_group_in, in, vport_number, ft->vport); + MLX5_SET(destroy_flow_group_in, in, other_vport, + !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT)); + return mlx5_cmd_exec_in(dev, destroy_flow_group, in); +} + +static int mlx5_set_extended_dest(struct mlx5_core_dev *dev, + struct fs_fte *fte, bool *extended_dest) +{ + int fw_log_max_fdb_encap_uplink = + MLX5_CAP_ESW(dev, log_max_fdb_encap_uplink); + int num_fwd_destinations = 0; + struct mlx5_flow_rule *dst; + int num_encap = 0; + + *extended_dest = false; + if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) + return 0; + + list_for_each_entry(dst, &fte->node.children, node.list) { + if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER || + dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_NONE) + continue; + if ((dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT || + dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) && + dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) + num_encap++; + num_fwd_destinations++; + } + if (num_fwd_destinations > 1 && num_encap > 0) + *extended_dest = true; + + if (*extended_dest && !fw_log_max_fdb_encap_uplink) { + mlx5_core_warn(dev, "FW does not support extended destination"); + return -EOPNOTSUPP; + } + if (num_encap > (1 << fw_log_max_fdb_encap_uplink)) { + mlx5_core_warn(dev, "FW does not support more than %d encaps", + 1 << fw_log_max_fdb_encap_uplink); + return -EOPNOTSUPP; + } + + return 0; +} + +static void +mlx5_cmd_set_fte_flow_meter(struct fs_fte *fte, void *in_flow_context) +{ + void *exe_aso_ctrl; + void *execute_aso; + + execute_aso = MLX5_ADDR_OF(flow_context, in_flow_context, + execute_aso[0]); + MLX5_SET(execute_aso, execute_aso, valid, 1); + MLX5_SET(execute_aso, execute_aso, aso_object_id, + fte->action.exe_aso.object_id); + + exe_aso_ctrl = MLX5_ADDR_OF(execute_aso, execute_aso, exe_aso_ctrl); + MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, return_reg_id, + fte->action.exe_aso.return_reg_id); + MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, aso_type, + fte->action.exe_aso.type); + MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, init_color, + fte->action.exe_aso.flow_meter.init_color); + MLX5_SET(exe_aso_ctrl_flow_meter, exe_aso_ctrl, meter_id, + fte->action.exe_aso.flow_meter.meter_idx); +} + +static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, + int opmod, int modify_mask, + struct mlx5_flow_table *ft, + unsigned group_id, + struct fs_fte *fte) +{ + u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0}; + bool extended_dest = false; + struct mlx5_flow_rule *dst; + void *in_flow_context, *vlan; + void *in_match_value; + int reformat_id = 0; + unsigned int inlen; + int dst_cnt_size; + u32 *in, action; + void *in_dests; + int err; + + if (mlx5_set_extended_dest(dev, fte, &extended_dest)) + return -EOPNOTSUPP; + + if (!extended_dest) + dst_cnt_size = MLX5_ST_SZ_BYTES(dest_format_struct); + else + dst_cnt_size = MLX5_ST_SZ_BYTES(extended_dest_format); + + inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fte->dests_size * dst_cnt_size; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY); + MLX5_SET(set_fte_in, in, op_mod, opmod); + MLX5_SET(set_fte_in, in, modify_enable_mask, modify_mask); + MLX5_SET(set_fte_in, in, table_type, ft->type); + MLX5_SET(set_fte_in, in, table_id, ft->id); + MLX5_SET(set_fte_in, in, flow_index, fte->index); + MLX5_SET(set_fte_in, in, ignore_flow_level, + !!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL)); + + MLX5_SET(set_fte_in, in, vport_number, ft->vport); + MLX5_SET(set_fte_in, in, other_vport, + !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT)); + + in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); + MLX5_SET(flow_context, in_flow_context, group_id, group_id); + + MLX5_SET(flow_context, in_flow_context, flow_tag, + fte->flow_context.flow_tag); + MLX5_SET(flow_context, in_flow_context, flow_source, + fte->flow_context.flow_source); + MLX5_SET(flow_context, in_flow_context, uplink_hairpin_en, + !!(fte->flow_context.flags & FLOW_CONTEXT_UPLINK_HAIRPIN_EN)); + + MLX5_SET(flow_context, in_flow_context, extended_destination, + extended_dest); + + action = fte->action.action; + if (extended_dest) + action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + + MLX5_SET(flow_context, in_flow_context, action, action); + + if (!extended_dest && fte->action.pkt_reformat) { + struct mlx5_pkt_reformat *pkt_reformat = fte->action.pkt_reformat; + + if (pkt_reformat->owner == MLX5_FLOW_RESOURCE_OWNER_SW) { + reformat_id = mlx5_fs_dr_action_get_pkt_reformat_id(pkt_reformat); + if (reformat_id < 0) { + mlx5_core_err(dev, + "Unsupported SW-owned pkt_reformat type (%d) in FW-owned table\n", + pkt_reformat->reformat_type); + err = reformat_id; + goto err_out; + } + } else { + reformat_id = fte->action.pkt_reformat->id; + } + } + + MLX5_SET(flow_context, in_flow_context, packet_reformat_id, (u32)reformat_id); + + if (fte->action.modify_hdr) { + if (fte->action.modify_hdr->owner == MLX5_FLOW_RESOURCE_OWNER_SW) { + mlx5_core_err(dev, "Can't use SW-owned modify_hdr in FW-owned table\n"); + err = -EOPNOTSUPP; + goto err_out; + } + + MLX5_SET(flow_context, in_flow_context, modify_header_id, + fte->action.modify_hdr->id); + } + + MLX5_SET(flow_context, in_flow_context, encrypt_decrypt_type, + fte->action.crypto.type); + MLX5_SET(flow_context, in_flow_context, encrypt_decrypt_obj_id, + fte->action.crypto.obj_id); + + vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan); + + MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[0].ethtype); + MLX5_SET(vlan, vlan, vid, fte->action.vlan[0].vid); + MLX5_SET(vlan, vlan, prio, fte->action.vlan[0].prio); + + vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan_2); + + MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[1].ethtype); + MLX5_SET(vlan, vlan, vid, fte->action.vlan[1].vid); + MLX5_SET(vlan, vlan, prio, fte->action.vlan[1].prio); + + in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context, + match_value); + memcpy(in_match_value, &fte->val, sizeof(fte->val)); + + in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + int list_size = 0; + + list_for_each_entry(dst, &fte->node.children, node.list) { + enum mlx5_flow_destination_type type = dst->dest_attr.type; + enum mlx5_ifc_flow_destination_type ifc_type; + unsigned int id; + + if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + + switch (type) { + case MLX5_FLOW_DESTINATION_TYPE_NONE: + continue; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: + id = dst->dest_attr.ft_num; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE; + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: + id = dst->dest_attr.ft->id; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE; + break; + case MLX5_FLOW_DESTINATION_TYPE_UPLINK: + case MLX5_FLOW_DESTINATION_TYPE_VPORT: + MLX5_SET(dest_format_struct, in_dests, + destination_eswitch_owner_vhca_id_valid, + !!(dst->dest_attr.vport.flags & + MLX5_FLOW_DEST_VPORT_VHCA_ID)); + MLX5_SET(dest_format_struct, in_dests, + destination_eswitch_owner_vhca_id, + dst->dest_attr.vport.vhca_id); + if (type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) { + /* destination_id is reserved */ + id = 0; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_UPLINK; + break; + } + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_VPORT; + id = dst->dest_attr.vport.num; + if (extended_dest && + dst->dest_attr.vport.pkt_reformat) { + MLX5_SET(dest_format_struct, in_dests, + packet_reformat, + !!(dst->dest_attr.vport.flags & + MLX5_FLOW_DEST_VPORT_REFORMAT_ID)); + MLX5_SET(extended_dest_format, in_dests, + packet_reformat_id, + dst->dest_attr.vport.pkt_reformat->id); + } + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER: + id = dst->dest_attr.sampler_id; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_SAMPLER; + break; + case MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE: + MLX5_SET(dest_format_struct, in_dests, + destination_table_type, dst->dest_attr.ft->type); + id = dst->dest_attr.ft->id; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_TABLE_TYPE; + break; + default: + id = dst->dest_attr.tir_num; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_TIR; + } + + MLX5_SET(dest_format_struct, in_dests, destination_type, + ifc_type); + MLX5_SET(dest_format_struct, in_dests, destination_id, id); + in_dests += dst_cnt_size; + list_size++; + } + + MLX5_SET(flow_context, in_flow_context, destination_list_size, + list_size); + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + int max_list_size = BIT(MLX5_CAP_FLOWTABLE_TYPE(dev, + log_max_flow_counter, + ft->type)); + int list_size = 0; + + list_for_each_entry(dst, &fte->node.children, node.list) { + if (dst->dest_attr.type != + MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + + MLX5_SET(flow_counter_list, in_dests, flow_counter_id, + dst->dest_attr.counter_id); + in_dests += dst_cnt_size; + list_size++; + } + if (list_size > max_list_size) { + err = -EINVAL; + goto err_out; + } + + MLX5_SET(flow_context, in_flow_context, flow_counter_list_size, + list_size); + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) { + if (fte->action.exe_aso.type == MLX5_EXE_ASO_FLOW_METER) { + mlx5_cmd_set_fte_flow_meter(fte, in_flow_context); + } else { + err = -EOPNOTSUPP; + goto err_out; + } + } + + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +err_out: + kvfree(in); + return err; +} + +static int mlx5_cmd_create_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *group, + struct fs_fte *fte) +{ + struct mlx5_core_dev *dev = ns->dev; + unsigned int group_id = group->id; + + return mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte); +} + +static int mlx5_cmd_update_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg, + int modify_mask, + struct fs_fte *fte) +{ + int opmod; + struct mlx5_core_dev *dev = ns->dev; + int atomic_mod_cap = MLX5_CAP_FLOWTABLE(dev, + flow_table_properties_nic_receive. + flow_modify_en); + if (!atomic_mod_cap) + return -EOPNOTSUPP; + opmod = 1; + + return mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, fg->id, fte); +} + +static int mlx5_cmd_delete_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct fs_fte *fte) +{ + u32 in[MLX5_ST_SZ_DW(delete_fte_in)] = {}; + struct mlx5_core_dev *dev = ns->dev; + + MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); + MLX5_SET(delete_fte_in, in, table_type, ft->type); + MLX5_SET(delete_fte_in, in, table_id, ft->id); + MLX5_SET(delete_fte_in, in, flow_index, fte->index); + MLX5_SET(delete_fte_in, in, vport_number, ft->vport); + MLX5_SET(delete_fte_in, in, other_vport, + !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT)); + + return mlx5_cmd_exec_in(dev, delete_fte, in); +} + +int mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, + enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask, + u32 *id) +{ + u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)] = {}; + int err; + + MLX5_SET(alloc_flow_counter_in, in, opcode, + MLX5_CMD_OP_ALLOC_FLOW_COUNTER); + MLX5_SET(alloc_flow_counter_in, in, flow_counter_bulk, alloc_bitmask); + + err = mlx5_cmd_exec_inout(dev, alloc_flow_counter, in, out); + if (!err) + *id = MLX5_GET(alloc_flow_counter_out, out, flow_counter_id); + return err; +} + +int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id) +{ + return mlx5_cmd_fc_bulk_alloc(dev, 0, id); +} + +int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)] = {}; + + MLX5_SET(dealloc_flow_counter_in, in, opcode, + MLX5_CMD_OP_DEALLOC_FLOW_COUNTER); + MLX5_SET(dealloc_flow_counter_in, in, flow_counter_id, id); + return mlx5_cmd_exec_in(dev, dealloc_flow_counter, in); +} + +int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id, + u64 *packets, u64 *bytes) +{ + u32 out[MLX5_ST_SZ_BYTES(query_flow_counter_out) + + MLX5_ST_SZ_BYTES(traffic_counter)] = {}; + u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {}; + void *stats; + int err = 0; + + MLX5_SET(query_flow_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_FLOW_COUNTER); + MLX5_SET(query_flow_counter_in, in, op_mod, 0); + MLX5_SET(query_flow_counter_in, in, flow_counter_id, id); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + stats = MLX5_ADDR_OF(query_flow_counter_out, out, flow_statistics); + *packets = MLX5_GET64(traffic_counter, stats, packets); + *bytes = MLX5_GET64(traffic_counter, stats, octets); + return 0; +} + +int mlx5_cmd_fc_get_bulk_query_out_len(int bulk_len) +{ + return MLX5_ST_SZ_BYTES(query_flow_counter_out) + + MLX5_ST_SZ_BYTES(traffic_counter) * bulk_len; +} + +int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len, + u32 *out) +{ + int outlen = mlx5_cmd_fc_get_bulk_query_out_len(bulk_len); + u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {}; + + MLX5_SET(query_flow_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_FLOW_COUNTER); + MLX5_SET(query_flow_counter_in, in, flow_counter_id, base_id); + MLX5_SET(query_flow_counter_in, in, num_of_counters, bulk_len); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + +static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat_params *params, + enum mlx5_flow_namespace_type namespace, + struct mlx5_pkt_reformat *pkt_reformat) +{ + u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)] = {}; + struct mlx5_core_dev *dev = ns->dev; + void *packet_reformat_context_in; + int max_encap_size; + void *reformat; + int inlen; + int err; + u32 *in; + + if (namespace == MLX5_FLOW_NAMESPACE_FDB || + namespace == MLX5_FLOW_NAMESPACE_FDB_BYPASS) + max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size); + else + max_encap_size = MLX5_CAP_FLOWTABLE(dev, max_encap_header_size); + + if (params->size > max_encap_size) { + mlx5_core_warn(dev, "encap size %zd too big, max supported is %d\n", + params->size, max_encap_size); + return -EINVAL; + } + + in = kzalloc(MLX5_ST_SZ_BYTES(alloc_packet_reformat_context_in) + + params->size, GFP_KERNEL); + if (!in) + return -ENOMEM; + + packet_reformat_context_in = MLX5_ADDR_OF(alloc_packet_reformat_context_in, + in, packet_reformat_context); + reformat = MLX5_ADDR_OF(packet_reformat_context_in, + packet_reformat_context_in, + reformat_data); + inlen = reformat - (void *)in + params->size; + + MLX5_SET(alloc_packet_reformat_context_in, in, opcode, + MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_SET(packet_reformat_context_in, packet_reformat_context_in, + reformat_data_size, params->size); + MLX5_SET(packet_reformat_context_in, packet_reformat_context_in, + reformat_type, params->type); + MLX5_SET(packet_reformat_context_in, packet_reformat_context_in, + reformat_param_0, params->param_0); + MLX5_SET(packet_reformat_context_in, packet_reformat_context_in, + reformat_param_1, params->param_1); + if (params->data && params->size) + memcpy(reformat, params->data, params->size); + + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + + pkt_reformat->id = MLX5_GET(alloc_packet_reformat_context_out, + out, packet_reformat_id); + pkt_reformat->owner = MLX5_FLOW_RESOURCE_OWNER_FW; + + kfree(in); + return err; +} + +static void mlx5_cmd_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat *pkt_reformat) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)] = {}; + struct mlx5_core_dev *dev = ns->dev; + + MLX5_SET(dealloc_packet_reformat_context_in, in, opcode, + MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id, + pkt_reformat->id); + + mlx5_cmd_exec_in(dev, dealloc_packet_reformat_context, in); +} + +static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns, + u8 namespace, u8 num_actions, + void *modify_actions, + struct mlx5_modify_hdr *modify_hdr) +{ + u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)] = {}; + int max_actions, actions_size, inlen, err; + struct mlx5_core_dev *dev = ns->dev; + void *actions_in; + u8 table_type; + u32 *in; + + switch (namespace) { + case MLX5_FLOW_NAMESPACE_FDB: + case MLX5_FLOW_NAMESPACE_FDB_BYPASS: + max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, max_modify_header_actions); + table_type = FS_FT_FDB; + break; + case MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC: + case MLX5_FLOW_NAMESPACE_KERNEL: + case MLX5_FLOW_NAMESPACE_BYPASS: + max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(dev, max_modify_header_actions); + table_type = FS_FT_NIC_RX; + break; + case MLX5_FLOW_NAMESPACE_EGRESS: + case MLX5_FLOW_NAMESPACE_EGRESS_IPSEC: + case MLX5_FLOW_NAMESPACE_EGRESS_MACSEC: + max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions); + table_type = FS_FT_NIC_TX; + break; + case MLX5_FLOW_NAMESPACE_ESW_INGRESS: + max_actions = MLX5_CAP_ESW_INGRESS_ACL(dev, max_modify_header_actions); + table_type = FS_FT_ESW_INGRESS_ACL; + break; + case MLX5_FLOW_NAMESPACE_RDMA_TX_MACSEC: + case MLX5_FLOW_NAMESPACE_RDMA_TX: + max_actions = MLX5_CAP_FLOWTABLE_RDMA_TX(dev, max_modify_header_actions); + table_type = FS_FT_RDMA_TX; + break; + default: + return -EOPNOTSUPP; + } + + if (num_actions > max_actions) { + mlx5_core_warn(dev, "too many modify header actions %d, max supported %d\n", + num_actions, max_actions); + return -EOPNOTSUPP; + } + + actions_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) * num_actions; + inlen = MLX5_ST_SZ_BYTES(alloc_modify_header_context_in) + actions_size; + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(alloc_modify_header_context_in, in, opcode, + MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(alloc_modify_header_context_in, in, table_type, table_type); + MLX5_SET(alloc_modify_header_context_in, in, num_of_actions, num_actions); + + actions_in = MLX5_ADDR_OF(alloc_modify_header_context_in, in, actions); + memcpy(actions_in, modify_actions, actions_size); + + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + + modify_hdr->id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id); + modify_hdr->owner = MLX5_FLOW_RESOURCE_OWNER_FW; + kfree(in); + return err; +} + +static void mlx5_cmd_modify_header_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_modify_hdr *modify_hdr) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)] = {}; + struct mlx5_core_dev *dev = ns->dev; + + MLX5_SET(dealloc_modify_header_context_in, in, opcode, + MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id, + modify_hdr->id); + + mlx5_cmd_exec_in(dev, dealloc_modify_header_context, in); +} + +static int mlx5_cmd_destroy_match_definer(struct mlx5_flow_root_namespace *ns, + int definer_id) +{ + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_OBJ_TYPE_MATCH_DEFINER); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, definer_id); + + return mlx5_cmd_exec(ns->dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_cmd_create_match_definer(struct mlx5_flow_root_namespace *ns, + u16 format_id, u32 *match_mask) +{ + u32 out[MLX5_ST_SZ_DW(create_match_definer_out)] = {}; + u32 in[MLX5_ST_SZ_DW(create_match_definer_in)] = {}; + struct mlx5_core_dev *dev = ns->dev; + void *ptr; + int err; + + MLX5_SET(create_match_definer_in, in, general_obj_in_cmd_hdr.opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(create_match_definer_in, in, general_obj_in_cmd_hdr.obj_type, + MLX5_OBJ_TYPE_MATCH_DEFINER); + + ptr = MLX5_ADDR_OF(create_match_definer_in, in, obj_context); + MLX5_SET(match_definer, ptr, format_id, format_id); + + ptr = MLX5_ADDR_OF(match_definer, ptr, match_mask); + memcpy(ptr, match_mask, MLX5_FLD_SZ_BYTES(match_definer, match_mask)); + + err = mlx5_cmd_exec_inout(dev, create_match_definer, in, out); + return err ? err : MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); +} + +static u32 mlx5_cmd_get_capabilities(struct mlx5_flow_root_namespace *ns, + enum fs_flow_table_type ft_type) +{ + return 0; +} + +static const struct mlx5_flow_cmds mlx5_flow_cmds = { + .create_flow_table = mlx5_cmd_create_flow_table, + .destroy_flow_table = mlx5_cmd_destroy_flow_table, + .modify_flow_table = mlx5_cmd_modify_flow_table, + .create_flow_group = mlx5_cmd_create_flow_group, + .destroy_flow_group = mlx5_cmd_destroy_flow_group, + .create_fte = mlx5_cmd_create_fte, + .update_fte = mlx5_cmd_update_fte, + .delete_fte = mlx5_cmd_delete_fte, + .update_root_ft = mlx5_cmd_update_root_ft, + .packet_reformat_alloc = mlx5_cmd_packet_reformat_alloc, + .packet_reformat_dealloc = mlx5_cmd_packet_reformat_dealloc, + .modify_header_alloc = mlx5_cmd_modify_header_alloc, + .modify_header_dealloc = mlx5_cmd_modify_header_dealloc, + .create_match_definer = mlx5_cmd_create_match_definer, + .destroy_match_definer = mlx5_cmd_destroy_match_definer, + .set_peer = mlx5_cmd_stub_set_peer, + .create_ns = mlx5_cmd_stub_create_ns, + .destroy_ns = mlx5_cmd_stub_destroy_ns, + .get_capabilities = mlx5_cmd_get_capabilities, +}; + +static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = { + .create_flow_table = mlx5_cmd_stub_create_flow_table, + .destroy_flow_table = mlx5_cmd_stub_destroy_flow_table, + .modify_flow_table = mlx5_cmd_stub_modify_flow_table, + .create_flow_group = mlx5_cmd_stub_create_flow_group, + .destroy_flow_group = mlx5_cmd_stub_destroy_flow_group, + .create_fte = mlx5_cmd_stub_create_fte, + .update_fte = mlx5_cmd_stub_update_fte, + .delete_fte = mlx5_cmd_stub_delete_fte, + .update_root_ft = mlx5_cmd_stub_update_root_ft, + .packet_reformat_alloc = mlx5_cmd_stub_packet_reformat_alloc, + .packet_reformat_dealloc = mlx5_cmd_stub_packet_reformat_dealloc, + .modify_header_alloc = mlx5_cmd_stub_modify_header_alloc, + .modify_header_dealloc = mlx5_cmd_stub_modify_header_dealloc, + .create_match_definer = mlx5_cmd_stub_create_match_definer, + .destroy_match_definer = mlx5_cmd_stub_destroy_match_definer, + .set_peer = mlx5_cmd_stub_set_peer, + .create_ns = mlx5_cmd_stub_create_ns, + .destroy_ns = mlx5_cmd_stub_destroy_ns, + .get_capabilities = mlx5_cmd_stub_get_capabilities, +}; + +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void) +{ + return &mlx5_flow_cmds; +} + +static const struct mlx5_flow_cmds *mlx5_fs_cmd_get_stub_cmds(void) +{ + return &mlx5_flow_cmd_stubs; +} + +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type) +{ + switch (type) { + case FS_FT_NIC_RX: + case FS_FT_ESW_EGRESS_ACL: + case FS_FT_ESW_INGRESS_ACL: + case FS_FT_FDB: + case FS_FT_SNIFFER_RX: + case FS_FT_SNIFFER_TX: + case FS_FT_NIC_TX: + case FS_FT_RDMA_RX: + case FS_FT_RDMA_TX: + case FS_FT_PORT_SEL: + return mlx5_fs_cmd_get_fw_cmds(); + default: + return mlx5_fs_cmd_get_stub_cmds(); + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h new file mode 100644 index 0000000000..7790ae5531 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _MLX5_FS_CMD_ +#define _MLX5_FS_CMD_ + +#include "fs_core.h" + +struct mlx5_flow_cmds { + int (*create_flow_table)(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table_attr *ft_attr, + struct mlx5_flow_table *next_ft); + int (*destroy_flow_table)(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft); + + int (*modify_flow_table)(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft); + + int (*create_flow_group)(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + u32 *in, + struct mlx5_flow_group *fg); + + int (*destroy_flow_group)(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg); + + int (*create_fte)(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg, + struct fs_fte *fte); + + int (*update_fte)(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg, + int modify_mask, + struct fs_fte *fte); + + int (*delete_fte)(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct fs_fte *fte); + + int (*update_root_ft)(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + u32 underlay_qpn, + bool disconnect); + + int (*packet_reformat_alloc)(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat_params *params, + enum mlx5_flow_namespace_type namespace, + struct mlx5_pkt_reformat *pkt_reformat); + + void (*packet_reformat_dealloc)(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat *pkt_reformat); + + int (*modify_header_alloc)(struct mlx5_flow_root_namespace *ns, + u8 namespace, u8 num_actions, + void *modify_actions, + struct mlx5_modify_hdr *modify_hdr); + + void (*modify_header_dealloc)(struct mlx5_flow_root_namespace *ns, + struct mlx5_modify_hdr *modify_hdr); + + int (*set_peer)(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns, + u16 peer_vhca_id); + + int (*create_ns)(struct mlx5_flow_root_namespace *ns); + int (*destroy_ns)(struct mlx5_flow_root_namespace *ns); + int (*create_match_definer)(struct mlx5_flow_root_namespace *ns, + u16 format_id, u32 *match_mask); + int (*destroy_match_definer)(struct mlx5_flow_root_namespace *ns, + int definer_id); + + u32 (*get_capabilities)(struct mlx5_flow_root_namespace *ns, + enum fs_flow_table_type ft_type); +}; + +int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id); +int mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, + enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask, + u32 *id); +int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id); +int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id, + u64 *packets, u64 *bytes); + +int mlx5_cmd_fc_get_bulk_query_out_len(int bulk_len); +int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len, + u32 *out); + +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type); +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c new file mode 100644 index 0000000000..a13b9c2bd1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -0,0 +1,3780 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "mlx5_core.h" +#include "fs_core.h" +#include "fs_cmd.h" +#include "fs_ft_pool.h" +#include "diag/fs_tracepoint.h" +#include "devlink.h" + +#define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\ + sizeof(struct init_tree_node)) + +#define ADD_PRIO(num_prios_val, min_level_val, num_levels_val, caps_val,\ + ...) {.type = FS_TYPE_PRIO,\ + .min_ft_level = min_level_val,\ + .num_levels = num_levels_val,\ + .num_leaf_prios = num_prios_val,\ + .caps = caps_val,\ + .children = (struct init_tree_node[]) {__VA_ARGS__},\ + .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \ +} + +#define ADD_MULTIPLE_PRIO(num_prios_val, num_levels_val, ...)\ + ADD_PRIO(num_prios_val, 0, num_levels_val, {},\ + __VA_ARGS__)\ + +#define ADD_NS(def_miss_act, ...) {.type = FS_TYPE_NAMESPACE, \ + .def_miss_action = def_miss_act,\ + .children = (struct init_tree_node[]) {__VA_ARGS__},\ + .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \ +} + +#define INIT_CAPS_ARRAY_SIZE(...) (sizeof((long[]){__VA_ARGS__}) /\ + sizeof(long)) + +#define FS_CAP(cap) (__mlx5_bit_off(flow_table_nic_cap, cap)) + +#define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \ + .caps = (long[]) {__VA_ARGS__} } + +#define FS_CHAINING_CAPS FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), \ + FS_CAP(flow_table_properties_nic_receive.modify_root), \ + FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), \ + FS_CAP(flow_table_properties_nic_receive.flow_table_modify)) + +#define FS_CHAINING_CAPS_EGRESS \ + FS_REQUIRED_CAPS( \ + FS_CAP(flow_table_properties_nic_transmit.flow_modify_en), \ + FS_CAP(flow_table_properties_nic_transmit.modify_root), \ + FS_CAP(flow_table_properties_nic_transmit \ + .identified_miss_table_mode), \ + FS_CAP(flow_table_properties_nic_transmit.flow_table_modify)) + +#define FS_CHAINING_CAPS_RDMA_TX \ + FS_REQUIRED_CAPS( \ + FS_CAP(flow_table_properties_nic_transmit_rdma.flow_modify_en), \ + FS_CAP(flow_table_properties_nic_transmit_rdma.modify_root), \ + FS_CAP(flow_table_properties_nic_transmit_rdma \ + .identified_miss_table_mode), \ + FS_CAP(flow_table_properties_nic_transmit_rdma \ + .flow_table_modify)) + +#define LEFTOVERS_NUM_LEVELS 1 +#define LEFTOVERS_NUM_PRIOS 1 + +#define RDMA_RX_COUNTERS_PRIO_NUM_LEVELS 1 +#define RDMA_TX_COUNTERS_PRIO_NUM_LEVELS 1 + +#define BY_PASS_PRIO_NUM_LEVELS 1 +#define BY_PASS_MIN_LEVEL (ETHTOOL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\ + LEFTOVERS_NUM_PRIOS) + +#define KERNEL_RX_MACSEC_NUM_PRIOS 1 +#define KERNEL_RX_MACSEC_NUM_LEVELS 3 +#define KERNEL_RX_MACSEC_MIN_LEVEL (BY_PASS_MIN_LEVEL + KERNEL_RX_MACSEC_NUM_PRIOS) + +#define ETHTOOL_PRIO_NUM_LEVELS 1 +#define ETHTOOL_NUM_PRIOS 11 +#define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) +/* Promiscuous, Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}}, IPsec policy, + * IPsec RoCE policy + */ +#define KERNEL_NIC_PRIO_NUM_LEVELS 9 +#define KERNEL_NIC_NUM_PRIOS 1 +/* One more level for tc */ +#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1) + +#define KERNEL_NIC_TC_NUM_PRIOS 1 +#define KERNEL_NIC_TC_NUM_LEVELS 3 + +#define ANCHOR_NUM_LEVELS 1 +#define ANCHOR_NUM_PRIOS 1 +#define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1) + +#define OFFLOADS_MAX_FT 2 +#define OFFLOADS_NUM_PRIOS 2 +#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + OFFLOADS_NUM_PRIOS) + +#define LAG_PRIO_NUM_LEVELS 1 +#define LAG_NUM_PRIOS 1 +#define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + KERNEL_RX_MACSEC_MIN_LEVEL + 1) + +#define KERNEL_TX_IPSEC_NUM_PRIOS 1 +#define KERNEL_TX_IPSEC_NUM_LEVELS 3 +#define KERNEL_TX_IPSEC_MIN_LEVEL (KERNEL_TX_IPSEC_NUM_LEVELS) + +#define KERNEL_TX_MACSEC_NUM_PRIOS 1 +#define KERNEL_TX_MACSEC_NUM_LEVELS 2 +#define KERNEL_TX_MACSEC_MIN_LEVEL (KERNEL_TX_IPSEC_MIN_LEVEL + KERNEL_TX_MACSEC_NUM_PRIOS) + +struct node_caps { + size_t arr_sz; + long *caps; +}; + +static struct init_tree_node { + enum fs_node_type type; + struct init_tree_node *children; + int ar_size; + struct node_caps caps; + int min_ft_level; + int num_leaf_prios; + int prio; + int num_levels; + enum mlx5_flow_table_miss_action def_miss_action; +} root_fs = { + .type = FS_TYPE_NAMESPACE, + .ar_size = 8, + .children = (struct init_tree_node[]){ + ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, + BY_PASS_PRIO_NUM_LEVELS))), + ADD_PRIO(0, KERNEL_RX_MACSEC_MIN_LEVEL, 0, FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(KERNEL_RX_MACSEC_NUM_PRIOS, + KERNEL_RX_MACSEC_NUM_LEVELS))), + ADD_PRIO(0, LAG_MIN_LEVEL, 0, FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS, + LAG_PRIO_NUM_LEVELS))), + ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, + OFFLOADS_MAX_FT))), + ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0, FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(ETHTOOL_NUM_PRIOS, + ETHTOOL_PRIO_NUM_LEVELS))), + ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {}, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(KERNEL_NIC_TC_NUM_PRIOS, + KERNEL_NIC_TC_NUM_LEVELS), + ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS, + KERNEL_NIC_PRIO_NUM_LEVELS))), + ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, + LEFTOVERS_NUM_LEVELS))), + ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {}, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, + ANCHOR_NUM_LEVELS))), + } +}; + +static struct init_tree_node egress_root_fs = { + .type = FS_TYPE_NAMESPACE, + .ar_size = 3, + .children = (struct init_tree_node[]) { + ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0, + FS_CHAINING_CAPS_EGRESS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, + BY_PASS_PRIO_NUM_LEVELS))), + ADD_PRIO(0, KERNEL_TX_IPSEC_MIN_LEVEL, 0, + FS_CHAINING_CAPS_EGRESS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(KERNEL_TX_IPSEC_NUM_PRIOS, + KERNEL_TX_IPSEC_NUM_LEVELS))), + ADD_PRIO(0, KERNEL_TX_MACSEC_MIN_LEVEL, 0, + FS_CHAINING_CAPS_EGRESS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(KERNEL_TX_MACSEC_NUM_PRIOS, + KERNEL_TX_MACSEC_NUM_LEVELS))), + } +}; + +enum { + RDMA_RX_IPSEC_PRIO, + RDMA_RX_MACSEC_PRIO, + RDMA_RX_COUNTERS_PRIO, + RDMA_RX_BYPASS_PRIO, + RDMA_RX_KERNEL_PRIO, +}; + +#define RDMA_RX_IPSEC_NUM_PRIOS 1 +#define RDMA_RX_IPSEC_NUM_LEVELS 2 +#define RDMA_RX_IPSEC_MIN_LEVEL (RDMA_RX_IPSEC_NUM_LEVELS) + +#define RDMA_RX_BYPASS_MIN_LEVEL MLX5_BY_PASS_NUM_REGULAR_PRIOS +#define RDMA_RX_KERNEL_MIN_LEVEL (RDMA_RX_BYPASS_MIN_LEVEL + 1) +#define RDMA_RX_COUNTERS_MIN_LEVEL (RDMA_RX_KERNEL_MIN_LEVEL + 2) + +#define RDMA_RX_MACSEC_NUM_PRIOS 1 +#define RDMA_RX_MACSEC_PRIO_NUM_LEVELS 2 +#define RDMA_RX_MACSEC_MIN_LEVEL (RDMA_RX_COUNTERS_MIN_LEVEL + RDMA_RX_MACSEC_NUM_PRIOS) + +static struct init_tree_node rdma_rx_root_fs = { + .type = FS_TYPE_NAMESPACE, + .ar_size = 5, + .children = (struct init_tree_node[]) { + [RDMA_RX_IPSEC_PRIO] = + ADD_PRIO(0, RDMA_RX_IPSEC_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(RDMA_RX_IPSEC_NUM_PRIOS, + RDMA_RX_IPSEC_NUM_LEVELS))), + [RDMA_RX_MACSEC_PRIO] = + ADD_PRIO(0, RDMA_RX_MACSEC_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(RDMA_RX_MACSEC_NUM_PRIOS, + RDMA_RX_MACSEC_PRIO_NUM_LEVELS))), + [RDMA_RX_COUNTERS_PRIO] = + ADD_PRIO(0, RDMA_RX_COUNTERS_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(MLX5_RDMA_RX_NUM_COUNTERS_PRIOS, + RDMA_RX_COUNTERS_PRIO_NUM_LEVELS))), + [RDMA_RX_BYPASS_PRIO] = + ADD_PRIO(0, RDMA_RX_BYPASS_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_REGULAR_PRIOS, + BY_PASS_PRIO_NUM_LEVELS))), + [RDMA_RX_KERNEL_PRIO] = + ADD_PRIO(0, RDMA_RX_KERNEL_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_SWITCH_DOMAIN, + ADD_MULTIPLE_PRIO(1, 1))), + } +}; + +enum { + RDMA_TX_COUNTERS_PRIO, + RDMA_TX_IPSEC_PRIO, + RDMA_TX_MACSEC_PRIO, + RDMA_TX_BYPASS_PRIO, +}; + +#define RDMA_TX_BYPASS_MIN_LEVEL MLX5_BY_PASS_NUM_PRIOS +#define RDMA_TX_COUNTERS_MIN_LEVEL (RDMA_TX_BYPASS_MIN_LEVEL + 1) + +#define RDMA_TX_IPSEC_NUM_PRIOS 1 +#define RDMA_TX_IPSEC_PRIO_NUM_LEVELS 1 +#define RDMA_TX_IPSEC_MIN_LEVEL (RDMA_TX_COUNTERS_MIN_LEVEL + RDMA_TX_IPSEC_NUM_PRIOS) + +#define RDMA_TX_MACSEC_NUM_PRIOS 1 +#define RDMA_TX_MACESC_PRIO_NUM_LEVELS 1 +#define RDMA_TX_MACSEC_MIN_LEVEL (RDMA_TX_COUNTERS_MIN_LEVEL + RDMA_TX_MACSEC_NUM_PRIOS) + +static struct init_tree_node rdma_tx_root_fs = { + .type = FS_TYPE_NAMESPACE, + .ar_size = 4, + .children = (struct init_tree_node[]) { + [RDMA_TX_COUNTERS_PRIO] = + ADD_PRIO(0, RDMA_TX_COUNTERS_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(MLX5_RDMA_TX_NUM_COUNTERS_PRIOS, + RDMA_TX_COUNTERS_PRIO_NUM_LEVELS))), + [RDMA_TX_IPSEC_PRIO] = + ADD_PRIO(0, RDMA_TX_IPSEC_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(RDMA_TX_IPSEC_NUM_PRIOS, + RDMA_TX_IPSEC_PRIO_NUM_LEVELS))), + [RDMA_TX_MACSEC_PRIO] = + ADD_PRIO(0, RDMA_TX_MACSEC_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(RDMA_TX_MACSEC_NUM_PRIOS, + RDMA_TX_MACESC_PRIO_NUM_LEVELS))), + [RDMA_TX_BYPASS_PRIO] = + ADD_PRIO(0, RDMA_TX_BYPASS_MIN_LEVEL, 0, + FS_CHAINING_CAPS_RDMA_TX, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(RDMA_TX_BYPASS_MIN_LEVEL, + BY_PASS_PRIO_NUM_LEVELS))), + } +}; + +enum fs_i_lock_class { + FS_LOCK_GRANDPARENT, + FS_LOCK_PARENT, + FS_LOCK_CHILD +}; + +static const struct rhashtable_params rhash_fte = { + .key_len = sizeof_field(struct fs_fte, val), + .key_offset = offsetof(struct fs_fte, val), + .head_offset = offsetof(struct fs_fte, hash), + .automatic_shrinking = true, + .min_size = 1, +}; + +static const struct rhashtable_params rhash_fg = { + .key_len = sizeof_field(struct mlx5_flow_group, mask), + .key_offset = offsetof(struct mlx5_flow_group, mask), + .head_offset = offsetof(struct mlx5_flow_group, hash), + .automatic_shrinking = true, + .min_size = 1, + +}; + +static void del_hw_flow_table(struct fs_node *node); +static void del_hw_flow_group(struct fs_node *node); +static void del_hw_fte(struct fs_node *node); +static void del_sw_flow_table(struct fs_node *node); +static void del_sw_flow_group(struct fs_node *node); +static void del_sw_fte(struct fs_node *node); +static void del_sw_prio(struct fs_node *node); +static void del_sw_ns(struct fs_node *node); +/* Delete rule (destination) is special case that + * requires to lock the FTE for all the deletion process. + */ +static void del_sw_hw_rule(struct fs_node *node); +static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, + struct mlx5_flow_destination *d2); +static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns); +static struct mlx5_flow_rule * +find_flow_rule(struct fs_fte *fte, + struct mlx5_flow_destination *dest); + +static void tree_init_node(struct fs_node *node, + void (*del_hw_func)(struct fs_node *), + void (*del_sw_func)(struct fs_node *)) +{ + refcount_set(&node->refcount, 1); + INIT_LIST_HEAD(&node->list); + INIT_LIST_HEAD(&node->children); + init_rwsem(&node->lock); + node->del_hw_func = del_hw_func; + node->del_sw_func = del_sw_func; + node->active = false; +} + +static void tree_add_node(struct fs_node *node, struct fs_node *parent) +{ + if (parent) + refcount_inc(&parent->refcount); + node->parent = parent; + + /* Parent is the root */ + if (!parent) + node->root = node; + else + node->root = parent->root; +} + +static int tree_get_node(struct fs_node *node) +{ + return refcount_inc_not_zero(&node->refcount); +} + +static void nested_down_read_ref_node(struct fs_node *node, + enum fs_i_lock_class class) +{ + if (node) { + down_read_nested(&node->lock, class); + refcount_inc(&node->refcount); + } +} + +static void nested_down_write_ref_node(struct fs_node *node, + enum fs_i_lock_class class) +{ + if (node) { + down_write_nested(&node->lock, class); + refcount_inc(&node->refcount); + } +} + +static void down_write_ref_node(struct fs_node *node, bool locked) +{ + if (node) { + if (!locked) + down_write(&node->lock); + refcount_inc(&node->refcount); + } +} + +static void up_read_ref_node(struct fs_node *node) +{ + refcount_dec(&node->refcount); + up_read(&node->lock); +} + +static void up_write_ref_node(struct fs_node *node, bool locked) +{ + refcount_dec(&node->refcount); + if (!locked) + up_write(&node->lock); +} + +static void tree_put_node(struct fs_node *node, bool locked) +{ + struct fs_node *parent_node = node->parent; + + if (refcount_dec_and_test(&node->refcount)) { + if (node->del_hw_func) + node->del_hw_func(node); + if (parent_node) { + down_write_ref_node(parent_node, locked); + list_del_init(&node->list); + } + node->del_sw_func(node); + if (parent_node) + up_write_ref_node(parent_node, locked); + node = NULL; + } + if (!node && parent_node) + tree_put_node(parent_node, locked); +} + +static int tree_remove_node(struct fs_node *node, bool locked) +{ + if (refcount_read(&node->refcount) > 1) { + refcount_dec(&node->refcount); + return -EEXIST; + } + tree_put_node(node, locked); + return 0; +} + +static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns, + unsigned int prio) +{ + struct fs_prio *iter_prio; + + fs_for_each_prio(iter_prio, ns) { + if (iter_prio->prio == prio) + return iter_prio; + } + + return NULL; +} + +static bool is_fwd_next_action(u32 action) +{ + return action & (MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO | + MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS); +} + +static bool is_fwd_dest_type(enum mlx5_flow_destination_type type) +{ + return type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM || + type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE || + type == MLX5_FLOW_DESTINATION_TYPE_UPLINK || + type == MLX5_FLOW_DESTINATION_TYPE_VPORT || + type == MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER || + type == MLX5_FLOW_DESTINATION_TYPE_TIR || + type == MLX5_FLOW_DESTINATION_TYPE_RANGE || + type == MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE; +} + +static bool check_valid_spec(const struct mlx5_flow_spec *spec) +{ + int i; + + for (i = 0; i < MLX5_ST_SZ_DW_MATCH_PARAM; i++) + if (spec->match_value[i] & ~spec->match_criteria[i]) { + pr_warn("mlx5_core: match_value differs from match_criteria\n"); + return false; + } + + return true; +} + +struct mlx5_flow_root_namespace *find_root(struct fs_node *node) +{ + struct fs_node *root; + struct mlx5_flow_namespace *ns; + + root = node->root; + + if (WARN_ON(root->type != FS_TYPE_NAMESPACE)) { + pr_warn("mlx5: flow steering node is not in tree or garbaged\n"); + return NULL; + } + + ns = container_of(root, struct mlx5_flow_namespace, node); + return container_of(ns, struct mlx5_flow_root_namespace, ns); +} + +static inline struct mlx5_flow_steering *get_steering(struct fs_node *node) +{ + struct mlx5_flow_root_namespace *root = find_root(node); + + if (root) + return root->dev->priv.steering; + return NULL; +} + +static inline struct mlx5_core_dev *get_dev(struct fs_node *node) +{ + struct mlx5_flow_root_namespace *root = find_root(node); + + if (root) + return root->dev; + return NULL; +} + +static void del_sw_ns(struct fs_node *node) +{ + kfree(node); +} + +static void del_sw_prio(struct fs_node *node) +{ + kfree(node); +} + +static void del_hw_flow_table(struct fs_node *node) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_table *ft; + struct mlx5_core_dev *dev; + int err; + + fs_get_obj(ft, node); + dev = get_dev(&ft->node); + root = find_root(&ft->node); + trace_mlx5_fs_del_ft(ft); + + if (node->active) { + err = root->cmds->destroy_flow_table(root, ft); + if (err) + mlx5_core_warn(dev, "flow steering can't destroy ft\n"); + } +} + +static void del_sw_flow_table(struct fs_node *node) +{ + struct mlx5_flow_table *ft; + struct fs_prio *prio; + + fs_get_obj(ft, node); + + rhltable_destroy(&ft->fgs_hash); + if (ft->node.parent) { + fs_get_obj(prio, ft->node.parent); + prio->num_ft--; + } + kfree(ft); +} + +static void modify_fte(struct fs_fte *fte) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct mlx5_core_dev *dev; + int err; + + fs_get_obj(fg, fte->node.parent); + fs_get_obj(ft, fg->node.parent); + dev = get_dev(&fte->node); + + root = find_root(&ft->node); + err = root->cmds->update_fte(root, ft, fg, fte->modify_mask, fte); + if (err) + mlx5_core_warn(dev, + "%s can't del rule fg id=%d fte_index=%d\n", + __func__, fg->id, fte->index); + fte->modify_mask = 0; +} + +static void del_sw_hw_rule(struct fs_node *node) +{ + struct mlx5_flow_rule *rule; + struct fs_fte *fte; + + fs_get_obj(rule, node); + fs_get_obj(fte, rule->node.parent); + trace_mlx5_fs_del_rule(rule); + if (is_fwd_next_action(rule->sw_action)) { + mutex_lock(&rule->dest_attr.ft->lock); + list_del(&rule->next_ft); + mutex_unlock(&rule->dest_attr.ft->lock); + } + + if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) { + --fte->dests_size; + fte->modify_mask |= + BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) | + BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS); + fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT; + goto out; + } + + if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_PORT) { + --fte->dests_size; + fte->modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION); + fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_ALLOW; + goto out; + } + + if (is_fwd_dest_type(rule->dest_attr.type)) { + --fte->dests_size; + --fte->fwd_dests; + + if (!fte->fwd_dests) + fte->action.action &= + ~MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + fte->modify_mask |= + BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); + goto out; + } +out: + kfree(rule); +} + +static void del_hw_fte(struct fs_node *node) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct mlx5_core_dev *dev; + struct fs_fte *fte; + int err; + + fs_get_obj(fte, node); + fs_get_obj(fg, fte->node.parent); + fs_get_obj(ft, fg->node.parent); + + trace_mlx5_fs_del_fte(fte); + WARN_ON(fte->dests_size); + dev = get_dev(&ft->node); + root = find_root(&ft->node); + if (node->active) { + err = root->cmds->delete_fte(root, ft, fte); + if (err) + mlx5_core_warn(dev, + "flow steering can't delete fte in index %d of flow group id %d\n", + fte->index, fg->id); + node->active = false; + } +} + +static void del_sw_fte(struct fs_node *node) +{ + struct mlx5_flow_steering *steering = get_steering(node); + struct mlx5_flow_group *fg; + struct fs_fte *fte; + int err; + + fs_get_obj(fte, node); + fs_get_obj(fg, fte->node.parent); + + err = rhashtable_remove_fast(&fg->ftes_hash, + &fte->hash, + rhash_fte); + WARN_ON(err); + ida_free(&fg->fte_allocator, fte->index - fg->start_index); + kmem_cache_free(steering->ftes_cache, fte); +} + +static void del_hw_flow_group(struct fs_node *node) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_group *fg; + struct mlx5_flow_table *ft; + struct mlx5_core_dev *dev; + + fs_get_obj(fg, node); + fs_get_obj(ft, fg->node.parent); + dev = get_dev(&ft->node); + trace_mlx5_fs_del_fg(fg); + + root = find_root(&ft->node); + if (fg->node.active && root->cmds->destroy_flow_group(root, ft, fg)) + mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n", + fg->id, ft->id); +} + +static void del_sw_flow_group(struct fs_node *node) +{ + struct mlx5_flow_steering *steering = get_steering(node); + struct mlx5_flow_group *fg; + struct mlx5_flow_table *ft; + int err; + + fs_get_obj(fg, node); + fs_get_obj(ft, fg->node.parent); + + rhashtable_destroy(&fg->ftes_hash); + ida_destroy(&fg->fte_allocator); + if (ft->autogroup.active && + fg->max_ftes == ft->autogroup.group_size && + fg->start_index < ft->autogroup.max_fte) + ft->autogroup.num_groups--; + err = rhltable_remove(&ft->fgs_hash, + &fg->hash, + rhash_fg); + WARN_ON(err); + kmem_cache_free(steering->fgs_cache, fg); +} + +static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte) +{ + int index; + int ret; + + index = ida_alloc_max(&fg->fte_allocator, fg->max_ftes - 1, GFP_KERNEL); + if (index < 0) + return index; + + fte->index = index + fg->start_index; + ret = rhashtable_insert_fast(&fg->ftes_hash, + &fte->hash, + rhash_fte); + if (ret) + goto err_ida_remove; + + tree_add_node(&fte->node, &fg->node); + list_add_tail(&fte->node.list, &fg->node.children); + return 0; + +err_ida_remove: + ida_free(&fg->fte_allocator, index); + return ret; +} + +static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, + const struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act) +{ + struct mlx5_flow_steering *steering = get_steering(&ft->node); + struct fs_fte *fte; + + fte = kmem_cache_zalloc(steering->ftes_cache, GFP_KERNEL); + if (!fte) + return ERR_PTR(-ENOMEM); + + memcpy(fte->val, &spec->match_value, sizeof(fte->val)); + fte->node.type = FS_TYPE_FLOW_ENTRY; + fte->action = *flow_act; + fte->flow_context = spec->flow_context; + + tree_init_node(&fte->node, del_hw_fte, del_sw_fte); + + return fte; +} + +static void dealloc_flow_group(struct mlx5_flow_steering *steering, + struct mlx5_flow_group *fg) +{ + rhashtable_destroy(&fg->ftes_hash); + kmem_cache_free(steering->fgs_cache, fg); +} + +static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steering, + u8 match_criteria_enable, + const void *match_criteria, + int start_index, + int end_index) +{ + struct mlx5_flow_group *fg; + int ret; + + fg = kmem_cache_zalloc(steering->fgs_cache, GFP_KERNEL); + if (!fg) + return ERR_PTR(-ENOMEM); + + ret = rhashtable_init(&fg->ftes_hash, &rhash_fte); + if (ret) { + kmem_cache_free(steering->fgs_cache, fg); + return ERR_PTR(ret); + } + + ida_init(&fg->fte_allocator); + fg->mask.match_criteria_enable = match_criteria_enable; + memcpy(&fg->mask.match_criteria, match_criteria, + sizeof(fg->mask.match_criteria)); + fg->node.type = FS_TYPE_FLOW_GROUP; + fg->start_index = start_index; + fg->max_ftes = end_index - start_index + 1; + + return fg; +} + +static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + const void *match_criteria, + int start_index, + int end_index, + struct list_head *prev) +{ + struct mlx5_flow_steering *steering = get_steering(&ft->node); + struct mlx5_flow_group *fg; + int ret; + + fg = alloc_flow_group(steering, match_criteria_enable, match_criteria, + start_index, end_index); + if (IS_ERR(fg)) + return fg; + + /* initialize refcnt, add to parent list */ + ret = rhltable_insert(&ft->fgs_hash, + &fg->hash, + rhash_fg); + if (ret) { + dealloc_flow_group(steering, fg); + return ERR_PTR(ret); + } + + tree_init_node(&fg->node, del_hw_flow_group, del_sw_flow_group); + tree_add_node(&fg->node, &ft->node); + /* Add node to group list */ + list_add(&fg->node.list, prev); + atomic_inc(&ft->node.version); + + return fg; +} + +static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, + enum fs_flow_table_type table_type, + enum fs_flow_table_op_mod op_mod, + u32 flags) +{ + struct mlx5_flow_table *ft; + int ret; + + ft = kzalloc(sizeof(*ft), GFP_KERNEL); + if (!ft) + return ERR_PTR(-ENOMEM); + + ret = rhltable_init(&ft->fgs_hash, &rhash_fg); + if (ret) { + kfree(ft); + return ERR_PTR(ret); + } + + ft->level = level; + ft->node.type = FS_TYPE_FLOW_TABLE; + ft->op_mod = op_mod; + ft->type = table_type; + ft->vport = vport; + ft->flags = flags; + INIT_LIST_HEAD(&ft->fwd_rules); + mutex_init(&ft->lock); + + return ft; +} + +/* If reverse is false, then we search for the first flow table in the + * root sub-tree from start(closest from right), else we search for the + * last flow table in the root sub-tree till start(closest from left). + */ +static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root, + struct list_head *start, + bool reverse) +{ +#define list_advance_entry(pos, reverse) \ + ((reverse) ? list_prev_entry(pos, list) : list_next_entry(pos, list)) + +#define list_for_each_advance_continue(pos, head, reverse) \ + for (pos = list_advance_entry(pos, reverse); \ + &pos->list != (head); \ + pos = list_advance_entry(pos, reverse)) + + struct fs_node *iter = list_entry(start, struct fs_node, list); + struct mlx5_flow_table *ft = NULL; + + if (!root) + return NULL; + + list_for_each_advance_continue(iter, &root->children, reverse) { + if (iter->type == FS_TYPE_FLOW_TABLE) { + fs_get_obj(ft, iter); + return ft; + } + ft = find_closest_ft_recursive(iter, &iter->children, reverse); + if (ft) + return ft; + } + + return ft; +} + +static struct fs_node *find_prio_chains_parent(struct fs_node *parent, + struct fs_node **child) +{ + struct fs_node *node = NULL; + + while (parent && parent->type != FS_TYPE_PRIO_CHAINS) { + node = parent; + parent = parent->parent; + } + + if (child) + *child = node; + + return parent; +} + +/* If reverse is false then return the first flow table next to the passed node + * in the tree, else return the last flow table before the node in the tree. + * If skip is true, skip the flow tables in the same prio_chains prio. + */ +static struct mlx5_flow_table *find_closest_ft(struct fs_node *node, bool reverse, + bool skip) +{ + struct fs_node *prio_chains_parent = NULL; + struct mlx5_flow_table *ft = NULL; + struct fs_node *curr_node; + struct fs_node *parent; + + if (skip) + prio_chains_parent = find_prio_chains_parent(node, NULL); + parent = node->parent; + curr_node = node; + while (!ft && parent) { + if (parent != prio_chains_parent) + ft = find_closest_ft_recursive(parent, &curr_node->list, + reverse); + curr_node = parent; + parent = curr_node->parent; + } + return ft; +} + +/* Assuming all the tree is locked by mutex chain lock */ +static struct mlx5_flow_table *find_next_chained_ft(struct fs_node *node) +{ + return find_closest_ft(node, false, true); +} + +/* Assuming all the tree is locked by mutex chain lock */ +static struct mlx5_flow_table *find_prev_chained_ft(struct fs_node *node) +{ + return find_closest_ft(node, true, true); +} + +static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft, + struct mlx5_flow_act *flow_act) +{ + struct fs_prio *prio; + bool next_ns; + + next_ns = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS; + fs_get_obj(prio, next_ns ? ft->ns->node.parent : ft->node.parent); + + return find_next_chained_ft(&prio->node); +} + +static int connect_fts_in_prio(struct mlx5_core_dev *dev, + struct fs_prio *prio, + struct mlx5_flow_table *ft) +{ + struct mlx5_flow_root_namespace *root = find_root(&prio->node); + struct mlx5_flow_table *iter; + int err; + + fs_for_each_ft(iter, prio) { + err = root->cmds->modify_flow_table(root, iter, ft); + if (err) { + mlx5_core_err(dev, + "Failed to modify flow table id %d, type %d, err %d\n", + iter->id, iter->type, err); + /* The driver is out of sync with the FW */ + return err; + } + } + return 0; +} + +static struct mlx5_flow_table *find_closet_ft_prio_chains(struct fs_node *node, + struct fs_node *parent, + struct fs_node **child, + bool reverse) +{ + struct mlx5_flow_table *ft; + + ft = find_closest_ft(node, reverse, false); + + if (ft && parent == find_prio_chains_parent(&ft->node, child)) + return ft; + + return NULL; +} + +/* Connect flow tables from previous priority of prio to ft */ +static int connect_prev_fts(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct fs_prio *prio) +{ + struct fs_node *prio_parent, *parent = NULL, *child, *node; + struct mlx5_flow_table *prev_ft; + int err = 0; + + prio_parent = find_prio_chains_parent(&prio->node, &child); + + /* return directly if not under the first sub ns of prio_chains prio */ + if (prio_parent && !list_is_first(&child->list, &prio_parent->children)) + return 0; + + prev_ft = find_prev_chained_ft(&prio->node); + while (prev_ft) { + struct fs_prio *prev_prio; + + fs_get_obj(prev_prio, prev_ft->node.parent); + err = connect_fts_in_prio(dev, prev_prio, ft); + if (err) + break; + + if (!parent) { + parent = find_prio_chains_parent(&prev_prio->node, &child); + if (!parent) + break; + } + + node = child; + prev_ft = find_closet_ft_prio_chains(node, parent, &child, true); + } + return err; +} + +static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio + *prio) +{ + struct mlx5_flow_root_namespace *root = find_root(&prio->node); + struct mlx5_ft_underlay_qp *uqp; + int min_level = INT_MAX; + int err = 0; + u32 qpn; + + if (root->root_ft) + min_level = root->root_ft->level; + + if (ft->level >= min_level) + return 0; + + if (list_empty(&root->underlay_qpns)) { + /* Don't set any QPN (zero) in case QPN list is empty */ + qpn = 0; + err = root->cmds->update_root_ft(root, ft, qpn, false); + } else { + list_for_each_entry(uqp, &root->underlay_qpns, list) { + qpn = uqp->qpn; + err = root->cmds->update_root_ft(root, ft, + qpn, false); + if (err) + break; + } + } + + if (err) + mlx5_core_warn(root->dev, + "Update root flow table of id(%u) qpn(%d) failed\n", + ft->id, qpn); + else + root->root_ft = ft; + + return err; +} + +static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct fs_fte *fte; + int modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); + int err = 0; + + fs_get_obj(fte, rule->node.parent); + if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) + return -EINVAL; + down_write_ref_node(&fte->node, false); + fs_get_obj(fg, fte->node.parent); + fs_get_obj(ft, fg->node.parent); + + memcpy(&rule->dest_attr, dest, sizeof(*dest)); + root = find_root(&ft->node); + err = root->cmds->update_fte(root, ft, fg, + modify_mask, fte); + up_write_ref_node(&fte->node, false); + + return err; +} + +int mlx5_modify_rule_destination(struct mlx5_flow_handle *handle, + struct mlx5_flow_destination *new_dest, + struct mlx5_flow_destination *old_dest) +{ + int i; + + if (!old_dest) { + if (handle->num_rules != 1) + return -EINVAL; + return _mlx5_modify_rule_destination(handle->rule[0], + new_dest); + } + + for (i = 0; i < handle->num_rules; i++) { + if (mlx5_flow_dests_cmp(old_dest, &handle->rule[i]->dest_attr)) + return _mlx5_modify_rule_destination(handle->rule[i], + new_dest); + } + + return -EINVAL; +} + +/* Modify/set FWD rules that point on old_next_ft to point on new_next_ft */ +static int connect_fwd_rules(struct mlx5_core_dev *dev, + struct mlx5_flow_table *new_next_ft, + struct mlx5_flow_table *old_next_ft) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_rule *iter; + int err = 0; + + /* new_next_ft and old_next_ft could be NULL only + * when we create/destroy the anchor flow table. + */ + if (!new_next_ft || !old_next_ft) + return 0; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = new_next_ft; + + mutex_lock(&old_next_ft->lock); + list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules); + mutex_unlock(&old_next_ft->lock); + list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) { + if ((iter->sw_action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS) && + iter->ft->ns == new_next_ft->ns) + continue; + + err = _mlx5_modify_rule_destination(iter, &dest); + if (err) + pr_err("mlx5_core: failed to modify rule to point on flow table %d\n", + new_next_ft->id); + } + return 0; +} + +static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, + struct fs_prio *prio) +{ + struct mlx5_flow_table *next_ft, *first_ft; + int err = 0; + + /* Connect_prev_fts and update_root_ft_create are mutually exclusive */ + + first_ft = list_first_entry_or_null(&prio->node.children, + struct mlx5_flow_table, node.list); + if (!first_ft || first_ft->level > ft->level) { + err = connect_prev_fts(dev, ft, prio); + if (err) + return err; + + next_ft = first_ft ? first_ft : find_next_chained_ft(&prio->node); + err = connect_fwd_rules(dev, ft, next_ft); + if (err) + return err; + } + + if (MLX5_CAP_FLOWTABLE(dev, + flow_table_properties_nic_receive.modify_root)) + err = update_root_ft_create(ft, prio); + return err; +} + +static void list_add_flow_table(struct mlx5_flow_table *ft, + struct fs_prio *prio) +{ + struct list_head *prev = &prio->node.children; + struct mlx5_flow_table *iter; + + fs_for_each_ft(iter, prio) { + if (iter->level > ft->level) + break; + prev = &iter->node.list; + } + list_add(&ft->node.list, prev); +} + +static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns, + struct mlx5_flow_table_attr *ft_attr, + enum fs_flow_table_op_mod op_mod, + u16 vport) +{ + struct mlx5_flow_root_namespace *root = find_root(&ns->node); + bool unmanaged = ft_attr->flags & MLX5_FLOW_TABLE_UNMANAGED; + struct mlx5_flow_table *next_ft; + struct fs_prio *fs_prio = NULL; + struct mlx5_flow_table *ft; + int err; + + if (!root) { + pr_err("mlx5: flow steering failed to find root of namespace\n"); + return ERR_PTR(-ENODEV); + } + + mutex_lock(&root->chain_lock); + fs_prio = find_prio(ns, ft_attr->prio); + if (!fs_prio) { + err = -EINVAL; + goto unlock_root; + } + if (!unmanaged) { + /* The level is related to the + * priority level range. + */ + if (ft_attr->level >= fs_prio->num_levels) { + err = -ENOSPC; + goto unlock_root; + } + + ft_attr->level += fs_prio->start_level; + } + + /* The level is related to the + * priority level range. + */ + ft = alloc_flow_table(ft_attr->level, + vport, + root->table_type, + op_mod, ft_attr->flags); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto unlock_root; + } + + tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table); + next_ft = unmanaged ? ft_attr->next_ft : + find_next_chained_ft(&fs_prio->node); + ft->def_miss_action = ns->def_miss_action; + ft->ns = ns; + err = root->cmds->create_flow_table(root, ft, ft_attr, next_ft); + if (err) + goto free_ft; + + if (!unmanaged) { + err = connect_flow_table(root->dev, ft, fs_prio); + if (err) + goto destroy_ft; + } + + ft->node.active = true; + down_write_ref_node(&fs_prio->node, false); + if (!unmanaged) { + tree_add_node(&ft->node, &fs_prio->node); + list_add_flow_table(ft, fs_prio); + } else { + ft->node.root = fs_prio->node.root; + } + fs_prio->num_ft++; + up_write_ref_node(&fs_prio->node, false); + mutex_unlock(&root->chain_lock); + trace_mlx5_fs_add_ft(ft); + return ft; +destroy_ft: + root->cmds->destroy_flow_table(root, ft); +free_ft: + rhltable_destroy(&ft->fgs_hash); + kfree(ft); +unlock_root: + mutex_unlock(&root->chain_lock); + return ERR_PTR(err); +} + +struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, + struct mlx5_flow_table_attr *ft_attr) +{ + return __mlx5_create_flow_table(ns, ft_attr, FS_FT_OP_MOD_NORMAL, 0); +} +EXPORT_SYMBOL(mlx5_create_flow_table); + +u32 mlx5_flow_table_id(struct mlx5_flow_table *ft) +{ + return ft->id; +} +EXPORT_SYMBOL(mlx5_flow_table_id); + +struct mlx5_flow_table * +mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, + struct mlx5_flow_table_attr *ft_attr, u16 vport) +{ + return __mlx5_create_flow_table(ns, ft_attr, FS_FT_OP_MOD_NORMAL, vport); +} + +struct mlx5_flow_table* +mlx5_create_lag_demux_flow_table(struct mlx5_flow_namespace *ns, + int prio, u32 level) +{ + struct mlx5_flow_table_attr ft_attr = {}; + + ft_attr.level = level; + ft_attr.prio = prio; + ft_attr.max_fte = 1; + + return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_LAG_DEMUX, 0); +} +EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table); + +#define MAX_FLOW_GROUP_SIZE BIT(24) +struct mlx5_flow_table* +mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, + struct mlx5_flow_table_attr *ft_attr) +{ + int num_reserved_entries = ft_attr->autogroup.num_reserved_entries; + int max_num_groups = ft_attr->autogroup.max_num_groups; + struct mlx5_flow_table *ft; + int autogroups_max_fte; + + ft = mlx5_create_flow_table(ns, ft_attr); + if (IS_ERR(ft)) + return ft; + + autogroups_max_fte = ft->max_fte - num_reserved_entries; + if (max_num_groups > autogroups_max_fte) + goto err_validate; + if (num_reserved_entries > ft->max_fte) + goto err_validate; + + /* Align the number of groups according to the largest group size */ + if (autogroups_max_fte / (max_num_groups + 1) > MAX_FLOW_GROUP_SIZE) + max_num_groups = (autogroups_max_fte / MAX_FLOW_GROUP_SIZE) - 1; + + ft->autogroup.active = true; + ft->autogroup.required_groups = max_num_groups; + ft->autogroup.max_fte = autogroups_max_fte; + /* We save place for flow groups in addition to max types */ + ft->autogroup.group_size = autogroups_max_fte / (max_num_groups + 1); + + return ft; + +err_validate: + mlx5_destroy_flow_table(ft); + return ERR_PTR(-ENOSPC); +} +EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table); + +struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, + u32 *fg_in) +{ + struct mlx5_flow_root_namespace *root = find_root(&ft->node); + void *match_criteria = MLX5_ADDR_OF(create_flow_group_in, + fg_in, match_criteria); + u8 match_criteria_enable = MLX5_GET(create_flow_group_in, + fg_in, + match_criteria_enable); + int start_index = MLX5_GET(create_flow_group_in, fg_in, + start_flow_index); + int end_index = MLX5_GET(create_flow_group_in, fg_in, + end_flow_index); + struct mlx5_flow_group *fg; + int err; + + if (ft->autogroup.active && start_index < ft->autogroup.max_fte) + return ERR_PTR(-EPERM); + + down_write_ref_node(&ft->node, false); + fg = alloc_insert_flow_group(ft, match_criteria_enable, match_criteria, + start_index, end_index, + ft->node.children.prev); + up_write_ref_node(&ft->node, false); + if (IS_ERR(fg)) + return fg; + + err = root->cmds->create_flow_group(root, ft, fg_in, fg); + if (err) { + tree_put_node(&fg->node, false); + return ERR_PTR(err); + } + trace_mlx5_fs_add_fg(fg); + fg->node.active = true; + + return fg; +} +EXPORT_SYMBOL(mlx5_create_flow_group); + +static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_rule *rule; + + rule = kzalloc(sizeof(*rule), GFP_KERNEL); + if (!rule) + return NULL; + + INIT_LIST_HEAD(&rule->next_ft); + rule->node.type = FS_TYPE_FLOW_DEST; + if (dest) + memcpy(&rule->dest_attr, dest, sizeof(*dest)); + else + rule->dest_attr.type = MLX5_FLOW_DESTINATION_TYPE_NONE; + + return rule; +} + +static struct mlx5_flow_handle *alloc_handle(int num_rules) +{ + struct mlx5_flow_handle *handle; + + handle = kzalloc(struct_size(handle, rule, num_rules), GFP_KERNEL); + if (!handle) + return NULL; + + handle->num_rules = num_rules; + + return handle; +} + +static void destroy_flow_handle(struct fs_fte *fte, + struct mlx5_flow_handle *handle, + struct mlx5_flow_destination *dest, + int i) +{ + for (; --i >= 0;) { + if (refcount_dec_and_test(&handle->rule[i]->node.refcount)) { + fte->dests_size--; + list_del(&handle->rule[i]->node.list); + kfree(handle->rule[i]); + } + } + kfree(handle); +} + +static struct mlx5_flow_handle * +create_flow_handle(struct fs_fte *fte, + struct mlx5_flow_destination *dest, + int dest_num, + int *modify_mask, + bool *new_rule) +{ + struct mlx5_flow_handle *handle; + struct mlx5_flow_rule *rule = NULL; + static int count = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS); + static int dst = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); + int type; + int i = 0; + + handle = alloc_handle((dest_num) ? dest_num : 1); + if (!handle) + return ERR_PTR(-ENOMEM); + + do { + if (dest) { + rule = find_flow_rule(fte, dest + i); + if (rule) { + refcount_inc(&rule->node.refcount); + goto rule_found; + } + } + + *new_rule = true; + rule = alloc_rule(dest + i); + if (!rule) + goto free_rules; + + /* Add dest to dests list- we need flow tables to be in the + * end of the list for forward to next prio rules. + */ + tree_init_node(&rule->node, NULL, del_sw_hw_rule); + if (dest && + dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) + list_add(&rule->node.list, &fte->node.children); + else + list_add_tail(&rule->node.list, &fte->node.children); + if (dest) { + fte->dests_size++; + + if (is_fwd_dest_type(dest[i].type)) + fte->fwd_dests++; + + type = dest[i].type == + MLX5_FLOW_DESTINATION_TYPE_COUNTER; + *modify_mask |= type ? count : dst; + } +rule_found: + handle->rule[i] = rule; + } while (++i < dest_num); + + return handle; + +free_rules: + destroy_flow_handle(fte, handle, dest, i); + return ERR_PTR(-ENOMEM); +} + +/* fte should not be deleted while calling this function */ +static struct mlx5_flow_handle * +add_rule_fte(struct fs_fte *fte, + struct mlx5_flow_group *fg, + struct mlx5_flow_destination *dest, + int dest_num, + bool update_action) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_handle *handle; + struct mlx5_flow_table *ft; + int modify_mask = 0; + int err; + bool new_rule = false; + + handle = create_flow_handle(fte, dest, dest_num, &modify_mask, + &new_rule); + if (IS_ERR(handle) || !new_rule) + goto out; + + if (update_action) + modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION); + + fs_get_obj(ft, fg->node.parent); + root = find_root(&fg->node); + if (!(fte->status & FS_FTE_STATUS_EXISTING)) + err = root->cmds->create_fte(root, ft, fg, fte); + else + err = root->cmds->update_fte(root, ft, fg, modify_mask, fte); + if (err) + goto free_handle; + + fte->node.active = true; + fte->status |= FS_FTE_STATUS_EXISTING; + atomic_inc(&fg->node.version); + +out: + return handle; + +free_handle: + destroy_flow_handle(fte, handle, dest, handle->num_rules); + return ERR_PTR(err); +} + +static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft, + const struct mlx5_flow_spec *spec) +{ + struct list_head *prev = &ft->node.children; + u32 max_fte = ft->autogroup.max_fte; + unsigned int candidate_index = 0; + unsigned int group_size = 0; + struct mlx5_flow_group *fg; + + if (!ft->autogroup.active) + return ERR_PTR(-ENOENT); + + if (ft->autogroup.num_groups < ft->autogroup.required_groups) + group_size = ft->autogroup.group_size; + + /* max_fte == ft->autogroup.max_types */ + if (group_size == 0) + group_size = 1; + + /* sorted by start_index */ + fs_for_each_fg(fg, ft) { + if (candidate_index + group_size > fg->start_index) + candidate_index = fg->start_index + fg->max_ftes; + else + break; + prev = &fg->node.list; + } + + if (candidate_index + group_size > max_fte) + return ERR_PTR(-ENOSPC); + + fg = alloc_insert_flow_group(ft, + spec->match_criteria_enable, + spec->match_criteria, + candidate_index, + candidate_index + group_size - 1, + prev); + if (IS_ERR(fg)) + goto out; + + if (group_size == ft->autogroup.group_size) + ft->autogroup.num_groups++; + +out: + return fg; +} + +static int create_auto_flow_group(struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg) +{ + struct mlx5_flow_root_namespace *root = find_root(&ft->node); + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *match_criteria_addr; + u8 src_esw_owner_mask_on; + void *misc; + int err; + u32 *in; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, + fg->mask.match_criteria_enable); + MLX5_SET(create_flow_group_in, in, start_flow_index, fg->start_index); + MLX5_SET(create_flow_group_in, in, end_flow_index, fg->start_index + + fg->max_ftes - 1); + + misc = MLX5_ADDR_OF(fte_match_param, fg->mask.match_criteria, + misc_parameters); + src_esw_owner_mask_on = !!MLX5_GET(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + MLX5_SET(create_flow_group_in, in, + source_eswitch_owner_vhca_id_valid, src_esw_owner_mask_on); + + match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in, + in, match_criteria); + memcpy(match_criteria_addr, fg->mask.match_criteria, + sizeof(fg->mask.match_criteria)); + + err = root->cmds->create_flow_group(root, ft, in, fg); + if (!err) { + fg->node.active = true; + trace_mlx5_fs_add_fg(fg); + } + + kvfree(in); + return err; +} + +static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, + struct mlx5_flow_destination *d2) +{ + if (d1->type == d2->type) { + if (((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT || + d1->type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) && + d1->vport.num == d2->vport.num && + d1->vport.flags == d2->vport.flags && + ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ? + (d1->vport.vhca_id == d2->vport.vhca_id) : true) && + ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ? + (d1->vport.pkt_reformat->id == + d2->vport.pkt_reformat->id) : true)) || + (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && + d1->ft == d2->ft) || + (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR && + d1->tir_num == d2->tir_num) || + (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM && + d1->ft_num == d2->ft_num) || + (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER && + d1->sampler_id == d2->sampler_id) || + (d1->type == MLX5_FLOW_DESTINATION_TYPE_RANGE && + d1->range.field == d2->range.field && + d1->range.hit_ft == d2->range.hit_ft && + d1->range.miss_ft == d2->range.miss_ft && + d1->range.min == d2->range.min && + d1->range.max == d2->range.max)) + return true; + } + + return false; +} + +static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_rule *rule; + + list_for_each_entry(rule, &fte->node.children, node.list) { + if (mlx5_flow_dests_cmp(&rule->dest_attr, dest)) + return rule; + } + return NULL; +} + +static bool check_conflicting_actions_vlan(const struct mlx5_fs_vlan *vlan0, + const struct mlx5_fs_vlan *vlan1) +{ + return vlan0->ethtype != vlan1->ethtype || + vlan0->vid != vlan1->vid || + vlan0->prio != vlan1->prio; +} + +static bool check_conflicting_actions(const struct mlx5_flow_act *act1, + const struct mlx5_flow_act *act2) +{ + u32 action1 = act1->action; + u32 action2 = act2->action; + u32 xored_actions; + + xored_actions = action1 ^ action2; + + /* if one rule only wants to count, it's ok */ + if (action1 == MLX5_FLOW_CONTEXT_ACTION_COUNT || + action2 == MLX5_FLOW_CONTEXT_ACTION_COUNT) + return false; + + if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | + MLX5_FLOW_CONTEXT_ACTION_DECAP | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | + MLX5_FLOW_CONTEXT_ACTION_VLAN_POP | + MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH | + MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2 | + MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2)) + return true; + + if (action1 & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT && + act1->pkt_reformat != act2->pkt_reformat) + return true; + + if (action1 & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && + act1->modify_hdr != act2->modify_hdr) + return true; + + if (action1 & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH && + check_conflicting_actions_vlan(&act1->vlan[0], &act2->vlan[0])) + return true; + + if (action1 & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 && + check_conflicting_actions_vlan(&act1->vlan[1], &act2->vlan[1])) + return true; + + return false; +} + +static int check_conflicting_ftes(struct fs_fte *fte, + const struct mlx5_flow_context *flow_context, + const struct mlx5_flow_act *flow_act) +{ + if (check_conflicting_actions(flow_act, &fte->action)) { + mlx5_core_warn(get_dev(&fte->node), + "Found two FTEs with conflicting actions\n"); + return -EEXIST; + } + + if ((flow_context->flags & FLOW_CONTEXT_HAS_TAG) && + fte->flow_context.flow_tag != flow_context->flow_tag) { + mlx5_core_warn(get_dev(&fte->node), + "FTE flow tag %u already exists with different flow tag %u\n", + fte->flow_context.flow_tag, + flow_context->flow_tag); + return -EEXIST; + } + + return 0; +} + +static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, + const struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int dest_num, + struct fs_fte *fte) +{ + struct mlx5_flow_handle *handle; + int old_action; + int i; + int ret; + + ret = check_conflicting_ftes(fte, &spec->flow_context, flow_act); + if (ret) + return ERR_PTR(ret); + + old_action = fte->action.action; + fte->action.action |= flow_act->action; + handle = add_rule_fte(fte, fg, dest, dest_num, + old_action != flow_act->action); + if (IS_ERR(handle)) { + fte->action.action = old_action; + return handle; + } + trace_mlx5_fs_set_fte(fte, false); + + for (i = 0; i < handle->num_rules; i++) { + if (refcount_read(&handle->rule[i]->node.refcount) == 1) { + tree_add_node(&handle->rule[i]->node, &fte->node); + trace_mlx5_fs_add_rule(handle->rule[i]); + } + } + return handle; +} + +static bool counter_is_valid(u32 action) +{ + return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_ALLOW | + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)); +} + +static bool dest_is_valid(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_table *ft) +{ + bool ignore_level = flow_act->flags & FLOW_ACT_IGNORE_FLOW_LEVEL; + u32 action = flow_act->action; + + if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)) + return counter_is_valid(action); + + if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) + return true; + + if (ignore_level) { + if (ft->type != FS_FT_FDB && + ft->type != FS_FT_NIC_RX && + ft->type != FS_FT_NIC_TX) + return false; + + if (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && + ft->type != dest->ft->type) + return false; + } + + if (!dest || ((dest->type == + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) && + (dest->ft->level <= ft->level && !ignore_level))) + return false; + return true; +} + +struct match_list { + struct list_head list; + struct mlx5_flow_group *g; +}; + +static void free_match_list(struct match_list *head, bool ft_locked) +{ + struct match_list *iter, *match_tmp; + + list_for_each_entry_safe(iter, match_tmp, &head->list, + list) { + tree_put_node(&iter->g->node, ft_locked); + list_del(&iter->list); + kfree(iter); + } +} + +static int build_match_list(struct match_list *match_head, + struct mlx5_flow_table *ft, + const struct mlx5_flow_spec *spec, + struct mlx5_flow_group *fg, + bool ft_locked) +{ + struct rhlist_head *tmp, *list; + struct mlx5_flow_group *g; + + rcu_read_lock(); + INIT_LIST_HEAD(&match_head->list); + /* Collect all fgs which has a matching match_criteria */ + list = rhltable_lookup(&ft->fgs_hash, spec, rhash_fg); + /* RCU is atomic, we can't execute FW commands here */ + rhl_for_each_entry_rcu(g, tmp, list, hash) { + struct match_list *curr_match; + + if (fg && fg != g) + continue; + + if (unlikely(!tree_get_node(&g->node))) + continue; + + curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC); + if (!curr_match) { + rcu_read_unlock(); + free_match_list(match_head, ft_locked); + return -ENOMEM; + } + curr_match->g = g; + list_add_tail(&curr_match->list, &match_head->list); + } + rcu_read_unlock(); + return 0; +} + +static u64 matched_fgs_get_version(struct list_head *match_head) +{ + struct match_list *iter; + u64 version = 0; + + list_for_each_entry(iter, match_head, list) + version += (u64)atomic_read(&iter->g->node.version); + return version; +} + +static struct fs_fte * +lookup_fte_locked(struct mlx5_flow_group *g, + const u32 *match_value, + bool take_write) +{ + struct fs_fte *fte_tmp; + + if (take_write) + nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); + else + nested_down_read_ref_node(&g->node, FS_LOCK_PARENT); + fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, match_value, + rhash_fte); + if (!fte_tmp || !tree_get_node(&fte_tmp->node)) { + fte_tmp = NULL; + goto out; + } + if (!fte_tmp->node.active) { + tree_put_node(&fte_tmp->node, false); + fte_tmp = NULL; + goto out; + } + + nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD); +out: + if (take_write) + up_write_ref_node(&g->node, false); + else + up_read_ref_node(&g->node); + return fte_tmp; +} + +static struct mlx5_flow_handle * +try_add_to_existing_fg(struct mlx5_flow_table *ft, + struct list_head *match_head, + const struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int dest_num, + int ft_version) +{ + struct mlx5_flow_steering *steering = get_steering(&ft->node); + struct mlx5_flow_group *g; + struct mlx5_flow_handle *rule; + struct match_list *iter; + bool take_write = false; + struct fs_fte *fte; + u64 version = 0; + int err; + + fte = alloc_fte(ft, spec, flow_act); + if (IS_ERR(fte)) + return ERR_PTR(-ENOMEM); + +search_again_locked: + if (flow_act->flags & FLOW_ACT_NO_APPEND) + goto skip_search; + version = matched_fgs_get_version(match_head); + /* Try to find an fte with identical match value and attempt update its + * action. + */ + list_for_each_entry(iter, match_head, list) { + struct fs_fte *fte_tmp; + + g = iter->g; + fte_tmp = lookup_fte_locked(g, spec->match_value, take_write); + if (!fte_tmp) + continue; + rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp); + /* No error check needed here, because insert_fte() is not called */ + up_write_ref_node(&fte_tmp->node, false); + tree_put_node(&fte_tmp->node, false); + kmem_cache_free(steering->ftes_cache, fte); + return rule; + } + +skip_search: + /* No group with matching fte found, or we skipped the search. + * Try to add a new fte to any matching fg. + */ + + /* Check the ft version, for case that new flow group + * was added while the fgs weren't locked + */ + if (atomic_read(&ft->node.version) != ft_version) { + rule = ERR_PTR(-EAGAIN); + goto out; + } + + /* Check the fgs version. If version have changed it could be that an + * FTE with the same match value was added while the fgs weren't + * locked. + */ + if (!(flow_act->flags & FLOW_ACT_NO_APPEND) && + version != matched_fgs_get_version(match_head)) { + take_write = true; + goto search_again_locked; + } + + list_for_each_entry(iter, match_head, list) { + g = iter->g; + + nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); + + if (!g->node.active) { + up_write_ref_node(&g->node, false); + continue; + } + + err = insert_fte(g, fte); + if (err) { + up_write_ref_node(&g->node, false); + if (err == -ENOSPC) + continue; + kmem_cache_free(steering->ftes_cache, fte); + return ERR_PTR(err); + } + + nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); + up_write_ref_node(&g->node, false); + rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); + up_write_ref_node(&fte->node, false); + if (IS_ERR(rule)) + tree_put_node(&fte->node, false); + return rule; + } + rule = ERR_PTR(-ENOENT); +out: + kmem_cache_free(steering->ftes_cache, fte); + return rule; +} + +static struct mlx5_flow_handle * +_mlx5_add_flow_rules(struct mlx5_flow_table *ft, + const struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int dest_num) + +{ + struct mlx5_flow_steering *steering = get_steering(&ft->node); + struct mlx5_flow_handle *rule; + struct match_list match_head; + struct mlx5_flow_group *g; + bool take_write = false; + struct fs_fte *fte; + int version; + int err; + int i; + + if (!check_valid_spec(spec)) + return ERR_PTR(-EINVAL); + + if (flow_act->fg && ft->autogroup.active) + return ERR_PTR(-EINVAL); + + if (dest && dest_num <= 0) + return ERR_PTR(-EINVAL); + + for (i = 0; i < dest_num; i++) { + if (!dest_is_valid(&dest[i], flow_act, ft)) + return ERR_PTR(-EINVAL); + } + nested_down_read_ref_node(&ft->node, FS_LOCK_GRANDPARENT); +search_again_locked: + version = atomic_read(&ft->node.version); + + /* Collect all fgs which has a matching match_criteria */ + err = build_match_list(&match_head, ft, spec, flow_act->fg, take_write); + if (err) { + if (take_write) + up_write_ref_node(&ft->node, false); + else + up_read_ref_node(&ft->node); + return ERR_PTR(err); + } + + if (!take_write) + up_read_ref_node(&ft->node); + + rule = try_add_to_existing_fg(ft, &match_head.list, spec, flow_act, dest, + dest_num, version); + free_match_list(&match_head, take_write); + if (!IS_ERR(rule) || + (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) { + if (take_write) + up_write_ref_node(&ft->node, false); + return rule; + } + + if (!take_write) { + nested_down_write_ref_node(&ft->node, FS_LOCK_GRANDPARENT); + take_write = true; + } + + if (PTR_ERR(rule) == -EAGAIN || + version != atomic_read(&ft->node.version)) + goto search_again_locked; + + g = alloc_auto_flow_group(ft, spec); + if (IS_ERR(g)) { + rule = ERR_CAST(g); + up_write_ref_node(&ft->node, false); + return rule; + } + + fte = alloc_fte(ft, spec, flow_act); + if (IS_ERR(fte)) { + up_write_ref_node(&ft->node, false); + err = PTR_ERR(fte); + goto err_alloc_fte; + } + + nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); + up_write_ref_node(&ft->node, false); + + err = create_auto_flow_group(ft, g); + if (err) + goto err_release_fg; + + err = insert_fte(g, fte); + if (err) + goto err_release_fg; + + nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); + up_write_ref_node(&g->node, false); + rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); + up_write_ref_node(&fte->node, false); + if (IS_ERR(rule)) + tree_put_node(&fte->node, false); + tree_put_node(&g->node, false); + return rule; + +err_release_fg: + up_write_ref_node(&g->node, false); + kmem_cache_free(steering->ftes_cache, fte); +err_alloc_fte: + tree_put_node(&g->node, false); + return ERR_PTR(err); +} + +static bool fwd_next_prio_supported(struct mlx5_flow_table *ft) +{ + return ((ft->type == FS_FT_NIC_RX) && + (MLX5_CAP_FLOWTABLE(get_dev(&ft->node), nic_rx_multi_path_tirs))); +} + +struct mlx5_flow_handle * +mlx5_add_flow_rules(struct mlx5_flow_table *ft, + const struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int num_dest) +{ + struct mlx5_flow_root_namespace *root = find_root(&ft->node); + static const struct mlx5_flow_spec zero_spec = {}; + struct mlx5_flow_destination *gen_dest = NULL; + struct mlx5_flow_table *next_ft = NULL; + struct mlx5_flow_handle *handle = NULL; + u32 sw_action = flow_act->action; + int i; + + if (!spec) + spec = &zero_spec; + + if (!is_fwd_next_action(sw_action)) + return _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest); + + if (!fwd_next_prio_supported(ft)) + return ERR_PTR(-EOPNOTSUPP); + + mutex_lock(&root->chain_lock); + next_ft = find_next_fwd_ft(ft, flow_act); + if (!next_ft) { + handle = ERR_PTR(-EOPNOTSUPP); + goto unlock; + } + + gen_dest = kcalloc(num_dest + 1, sizeof(*dest), + GFP_KERNEL); + if (!gen_dest) { + handle = ERR_PTR(-ENOMEM); + goto unlock; + } + for (i = 0; i < num_dest; i++) + gen_dest[i] = dest[i]; + gen_dest[i].type = + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + gen_dest[i].ft = next_ft; + dest = gen_dest; + num_dest++; + flow_act->action &= ~(MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO | + MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS); + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest); + if (IS_ERR(handle)) + goto unlock; + + if (list_empty(&handle->rule[num_dest - 1]->next_ft)) { + mutex_lock(&next_ft->lock); + list_add(&handle->rule[num_dest - 1]->next_ft, + &next_ft->fwd_rules); + mutex_unlock(&next_ft->lock); + handle->rule[num_dest - 1]->sw_action = sw_action; + handle->rule[num_dest - 1]->ft = ft; + } +unlock: + mutex_unlock(&root->chain_lock); + kfree(gen_dest); + return handle; +} +EXPORT_SYMBOL(mlx5_add_flow_rules); + +void mlx5_del_flow_rules(struct mlx5_flow_handle *handle) +{ + struct fs_fte *fte; + int i; + + /* In order to consolidate the HW changes we lock the FTE for other + * changes, and increase its refcount, in order not to perform the + * "del" functions of the FTE. Will handle them here. + * The removal of the rules is done under locked FTE. + * After removing all the handle's rules, if there are remaining + * rules, it means we just need to modify the FTE in FW, and + * unlock/decrease the refcount we increased before. + * Otherwise, it means the FTE should be deleted. First delete the + * FTE in FW. Then, unlock the FTE, and proceed the tree_put_node of + * the FTE, which will handle the last decrease of the refcount, as + * well as required handling of its parent. + */ + fs_get_obj(fte, handle->rule[0]->node.parent); + down_write_ref_node(&fte->node, false); + for (i = handle->num_rules - 1; i >= 0; i--) + tree_remove_node(&handle->rule[i]->node, true); + if (list_empty(&fte->node.children)) { + fte->node.del_hw_func(&fte->node); + /* Avoid double call to del_hw_fte */ + fte->node.del_hw_func = NULL; + up_write_ref_node(&fte->node, false); + tree_put_node(&fte->node, false); + } else if (fte->dests_size) { + if (fte->modify_mask) + modify_fte(fte); + up_write_ref_node(&fte->node, false); + } else { + up_write_ref_node(&fte->node, false); + } + kfree(handle); +} +EXPORT_SYMBOL(mlx5_del_flow_rules); + +/* Assuming prio->node.children(flow tables) is sorted by level */ +static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft) +{ + struct fs_node *prio_parent, *child; + struct fs_prio *prio; + + fs_get_obj(prio, ft->node.parent); + + if (!list_is_last(&ft->node.list, &prio->node.children)) + return list_next_entry(ft, node.list); + + prio_parent = find_prio_chains_parent(&prio->node, &child); + + if (prio_parent && list_is_first(&child->list, &prio_parent->children)) + return find_closest_ft(&prio->node, false, false); + + return find_next_chained_ft(&prio->node); +} + +static int update_root_ft_destroy(struct mlx5_flow_table *ft) +{ + struct mlx5_flow_root_namespace *root = find_root(&ft->node); + struct mlx5_ft_underlay_qp *uqp; + struct mlx5_flow_table *new_root_ft = NULL; + int err = 0; + u32 qpn; + + if (root->root_ft != ft) + return 0; + + new_root_ft = find_next_ft(ft); + if (!new_root_ft) { + root->root_ft = NULL; + return 0; + } + + if (list_empty(&root->underlay_qpns)) { + /* Don't set any QPN (zero) in case QPN list is empty */ + qpn = 0; + err = root->cmds->update_root_ft(root, new_root_ft, + qpn, false); + } else { + list_for_each_entry(uqp, &root->underlay_qpns, list) { + qpn = uqp->qpn; + err = root->cmds->update_root_ft(root, + new_root_ft, qpn, + false); + if (err) + break; + } + } + + if (err) + mlx5_core_warn(root->dev, + "Update root flow table of id(%u) qpn(%d) failed\n", + ft->id, qpn); + else + root->root_ft = new_root_ft; + + return 0; +} + +/* Connect flow table from previous priority to + * the next flow table. + */ +static int disconnect_flow_table(struct mlx5_flow_table *ft) +{ + struct mlx5_core_dev *dev = get_dev(&ft->node); + struct mlx5_flow_table *next_ft; + struct fs_prio *prio; + int err = 0; + + err = update_root_ft_destroy(ft); + if (err) + return err; + + fs_get_obj(prio, ft->node.parent); + if (!(list_first_entry(&prio->node.children, + struct mlx5_flow_table, + node.list) == ft)) + return 0; + + next_ft = find_next_ft(ft); + err = connect_fwd_rules(dev, next_ft, ft); + if (err) + return err; + + err = connect_prev_fts(dev, next_ft, prio); + if (err) + mlx5_core_warn(dev, "Failed to disconnect flow table %d\n", + ft->id); + return err; +} + +int mlx5_destroy_flow_table(struct mlx5_flow_table *ft) +{ + struct mlx5_flow_root_namespace *root = find_root(&ft->node); + int err = 0; + + mutex_lock(&root->chain_lock); + if (!(ft->flags & MLX5_FLOW_TABLE_UNMANAGED)) + err = disconnect_flow_table(ft); + if (err) { + mutex_unlock(&root->chain_lock); + return err; + } + if (tree_remove_node(&ft->node, false)) + mlx5_core_warn(get_dev(&ft->node), "Flow table %d wasn't destroyed, refcount > 1\n", + ft->id); + mutex_unlock(&root->chain_lock); + + return err; +} +EXPORT_SYMBOL(mlx5_destroy_flow_table); + +void mlx5_destroy_flow_group(struct mlx5_flow_group *fg) +{ + if (tree_remove_node(&fg->node, false)) + mlx5_core_warn(get_dev(&fg->node), "Flow group %d wasn't destroyed, refcount > 1\n", + fg->id); +} +EXPORT_SYMBOL(mlx5_destroy_flow_group); + +struct mlx5_flow_namespace *mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev, + int n) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + + if (!steering || !steering->fdb_sub_ns) + return NULL; + + return steering->fdb_sub_ns[n]; +} +EXPORT_SYMBOL(mlx5_get_fdb_sub_ns); + +static bool is_nic_rx_ns(enum mlx5_flow_namespace_type type) +{ + switch (type) { + case MLX5_FLOW_NAMESPACE_BYPASS: + case MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC: + case MLX5_FLOW_NAMESPACE_LAG: + case MLX5_FLOW_NAMESPACE_OFFLOADS: + case MLX5_FLOW_NAMESPACE_ETHTOOL: + case MLX5_FLOW_NAMESPACE_KERNEL: + case MLX5_FLOW_NAMESPACE_LEFTOVERS: + case MLX5_FLOW_NAMESPACE_ANCHOR: + return true; + default: + return false; + } +} + +struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, + enum mlx5_flow_namespace_type type) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + struct mlx5_flow_root_namespace *root_ns; + int prio = 0; + struct fs_prio *fs_prio; + struct mlx5_flow_namespace *ns; + + if (!steering) + return NULL; + + switch (type) { + case MLX5_FLOW_NAMESPACE_FDB: + if (steering->fdb_root_ns) + return &steering->fdb_root_ns->ns; + return NULL; + case MLX5_FLOW_NAMESPACE_PORT_SEL: + if (steering->port_sel_root_ns) + return &steering->port_sel_root_ns->ns; + return NULL; + case MLX5_FLOW_NAMESPACE_SNIFFER_RX: + if (steering->sniffer_rx_root_ns) + return &steering->sniffer_rx_root_ns->ns; + return NULL; + case MLX5_FLOW_NAMESPACE_SNIFFER_TX: + if (steering->sniffer_tx_root_ns) + return &steering->sniffer_tx_root_ns->ns; + return NULL; + case MLX5_FLOW_NAMESPACE_FDB_BYPASS: + root_ns = steering->fdb_root_ns; + prio = FDB_BYPASS_PATH; + break; + case MLX5_FLOW_NAMESPACE_EGRESS: + case MLX5_FLOW_NAMESPACE_EGRESS_IPSEC: + case MLX5_FLOW_NAMESPACE_EGRESS_MACSEC: + root_ns = steering->egress_root_ns; + prio = type - MLX5_FLOW_NAMESPACE_EGRESS; + break; + case MLX5_FLOW_NAMESPACE_RDMA_RX: + root_ns = steering->rdma_rx_root_ns; + prio = RDMA_RX_BYPASS_PRIO; + break; + case MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL: + root_ns = steering->rdma_rx_root_ns; + prio = RDMA_RX_KERNEL_PRIO; + break; + case MLX5_FLOW_NAMESPACE_RDMA_TX: + root_ns = steering->rdma_tx_root_ns; + break; + case MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS: + root_ns = steering->rdma_rx_root_ns; + prio = RDMA_RX_COUNTERS_PRIO; + break; + case MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS: + root_ns = steering->rdma_tx_root_ns; + prio = RDMA_TX_COUNTERS_PRIO; + break; + case MLX5_FLOW_NAMESPACE_RDMA_RX_IPSEC: + root_ns = steering->rdma_rx_root_ns; + prio = RDMA_RX_IPSEC_PRIO; + break; + case MLX5_FLOW_NAMESPACE_RDMA_TX_IPSEC: + root_ns = steering->rdma_tx_root_ns; + prio = RDMA_TX_IPSEC_PRIO; + break; + case MLX5_FLOW_NAMESPACE_RDMA_RX_MACSEC: + root_ns = steering->rdma_rx_root_ns; + prio = RDMA_RX_MACSEC_PRIO; + break; + case MLX5_FLOW_NAMESPACE_RDMA_TX_MACSEC: + root_ns = steering->rdma_tx_root_ns; + prio = RDMA_TX_MACSEC_PRIO; + break; + default: /* Must be NIC RX */ + WARN_ON(!is_nic_rx_ns(type)); + root_ns = steering->root_ns; + prio = type; + break; + } + + if (!root_ns) + return NULL; + + fs_prio = find_prio(&root_ns->ns, prio); + if (!fs_prio) + return NULL; + + ns = list_first_entry(&fs_prio->node.children, + typeof(*ns), + node.list); + + return ns; +} +EXPORT_SYMBOL(mlx5_get_flow_namespace); + +struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_dev *dev, + enum mlx5_flow_namespace_type type, + int vport) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + + if (!steering) + return NULL; + + switch (type) { + case MLX5_FLOW_NAMESPACE_ESW_EGRESS: + if (vport >= steering->esw_egress_acl_vports) + return NULL; + if (steering->esw_egress_root_ns && + steering->esw_egress_root_ns[vport]) + return &steering->esw_egress_root_ns[vport]->ns; + else + return NULL; + case MLX5_FLOW_NAMESPACE_ESW_INGRESS: + if (vport >= steering->esw_ingress_acl_vports) + return NULL; + if (steering->esw_ingress_root_ns && + steering->esw_ingress_root_ns[vport]) + return &steering->esw_ingress_root_ns[vport]->ns; + else + return NULL; + default: + return NULL; + } +} + +static struct fs_prio *_fs_create_prio(struct mlx5_flow_namespace *ns, + unsigned int prio, + int num_levels, + enum fs_node_type type) +{ + struct fs_prio *fs_prio; + + fs_prio = kzalloc(sizeof(*fs_prio), GFP_KERNEL); + if (!fs_prio) + return ERR_PTR(-ENOMEM); + + fs_prio->node.type = type; + tree_init_node(&fs_prio->node, NULL, del_sw_prio); + tree_add_node(&fs_prio->node, &ns->node); + fs_prio->num_levels = num_levels; + fs_prio->prio = prio; + list_add_tail(&fs_prio->node.list, &ns->node.children); + + return fs_prio; +} + +static struct fs_prio *fs_create_prio_chained(struct mlx5_flow_namespace *ns, + unsigned int prio, + int num_levels) +{ + return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO_CHAINS); +} + +static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, + unsigned int prio, int num_levels) +{ + return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO); +} + +static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace + *ns) +{ + ns->node.type = FS_TYPE_NAMESPACE; + + return ns; +} + +static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio, + int def_miss_act) +{ + struct mlx5_flow_namespace *ns; + + ns = kzalloc(sizeof(*ns), GFP_KERNEL); + if (!ns) + return ERR_PTR(-ENOMEM); + + fs_init_namespace(ns); + ns->def_miss_action = def_miss_act; + tree_init_node(&ns->node, NULL, del_sw_ns); + tree_add_node(&ns->node, &prio->node); + list_add_tail(&ns->node.list, &prio->node.children); + + return ns; +} + +static int create_leaf_prios(struct mlx5_flow_namespace *ns, int prio, + struct init_tree_node *prio_metadata) +{ + struct fs_prio *fs_prio; + int i; + + for (i = 0; i < prio_metadata->num_leaf_prios; i++) { + fs_prio = fs_create_prio(ns, prio++, prio_metadata->num_levels); + if (IS_ERR(fs_prio)) + return PTR_ERR(fs_prio); + } + return 0; +} + +#define FLOW_TABLE_BIT_SZ 1 +#define GET_FLOW_TABLE_CAP(dev, offset) \ + ((be32_to_cpu(*((__be32 *)(dev->caps.hca[MLX5_CAP_FLOW_TABLE]->cur) + \ + offset / 32)) >> \ + (32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ) +static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps) +{ + int i; + + for (i = 0; i < caps->arr_sz; i++) { + if (!GET_FLOW_TABLE_CAP(dev, caps->caps[i])) + return false; + } + return true; +} + +static int init_root_tree_recursive(struct mlx5_flow_steering *steering, + struct init_tree_node *init_node, + struct fs_node *fs_parent_node, + struct init_tree_node *init_parent_node, + int prio) +{ + int max_ft_level = MLX5_CAP_FLOWTABLE(steering->dev, + flow_table_properties_nic_receive. + max_ft_level); + struct mlx5_flow_namespace *fs_ns; + struct fs_prio *fs_prio; + struct fs_node *base; + int i; + int err; + + if (init_node->type == FS_TYPE_PRIO) { + if ((init_node->min_ft_level > max_ft_level) || + !has_required_caps(steering->dev, &init_node->caps)) + return 0; + + fs_get_obj(fs_ns, fs_parent_node); + if (init_node->num_leaf_prios) + return create_leaf_prios(fs_ns, prio, init_node); + fs_prio = fs_create_prio(fs_ns, prio, init_node->num_levels); + if (IS_ERR(fs_prio)) + return PTR_ERR(fs_prio); + base = &fs_prio->node; + } else if (init_node->type == FS_TYPE_NAMESPACE) { + fs_get_obj(fs_prio, fs_parent_node); + fs_ns = fs_create_namespace(fs_prio, init_node->def_miss_action); + if (IS_ERR(fs_ns)) + return PTR_ERR(fs_ns); + base = &fs_ns->node; + } else { + return -EINVAL; + } + prio = 0; + for (i = 0; i < init_node->ar_size; i++) { + err = init_root_tree_recursive(steering, &init_node->children[i], + base, init_node, prio); + if (err) + return err; + if (init_node->children[i].type == FS_TYPE_PRIO && + init_node->children[i].num_leaf_prios) { + prio += init_node->children[i].num_leaf_prios; + } + } + + return 0; +} + +static int init_root_tree(struct mlx5_flow_steering *steering, + struct init_tree_node *init_node, + struct fs_node *fs_parent_node) +{ + int err; + int i; + + for (i = 0; i < init_node->ar_size; i++) { + err = init_root_tree_recursive(steering, &init_node->children[i], + fs_parent_node, + init_node, i); + if (err) + return err; + } + return 0; +} + +static void del_sw_root_ns(struct fs_node *node) +{ + struct mlx5_flow_root_namespace *root_ns; + struct mlx5_flow_namespace *ns; + + fs_get_obj(ns, node); + root_ns = container_of(ns, struct mlx5_flow_root_namespace, ns); + mutex_destroy(&root_ns->chain_lock); + kfree(node); +} + +static struct mlx5_flow_root_namespace +*create_root_ns(struct mlx5_flow_steering *steering, + enum fs_flow_table_type table_type) +{ + const struct mlx5_flow_cmds *cmds = mlx5_fs_cmd_get_default(table_type); + struct mlx5_flow_root_namespace *root_ns; + struct mlx5_flow_namespace *ns; + + /* Create the root namespace */ + root_ns = kzalloc(sizeof(*root_ns), GFP_KERNEL); + if (!root_ns) + return NULL; + + root_ns->dev = steering->dev; + root_ns->table_type = table_type; + root_ns->cmds = cmds; + + INIT_LIST_HEAD(&root_ns->underlay_qpns); + + ns = &root_ns->ns; + fs_init_namespace(ns); + mutex_init(&root_ns->chain_lock); + tree_init_node(&ns->node, NULL, del_sw_root_ns); + tree_add_node(&ns->node, NULL); + + return root_ns; +} + +static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level); + +static int set_prio_attrs_in_ns(struct mlx5_flow_namespace *ns, int acc_level) +{ + struct fs_prio *prio; + + fs_for_each_prio(prio, ns) { + /* This updates prio start_level and num_levels */ + set_prio_attrs_in_prio(prio, acc_level); + acc_level += prio->num_levels; + } + return acc_level; +} + +static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level) +{ + struct mlx5_flow_namespace *ns; + int acc_level_ns = acc_level; + + prio->start_level = acc_level; + fs_for_each_ns(ns, prio) { + /* This updates start_level and num_levels of ns's priority descendants */ + acc_level_ns = set_prio_attrs_in_ns(ns, acc_level); + + /* If this a prio with chains, and we can jump from one chain + * (namespace) to another, so we accumulate the levels + */ + if (prio->node.type == FS_TYPE_PRIO_CHAINS) + acc_level = acc_level_ns; + } + + if (!prio->num_levels) + prio->num_levels = acc_level_ns - prio->start_level; + WARN_ON(prio->num_levels < acc_level_ns - prio->start_level); +} + +static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns) +{ + struct mlx5_flow_namespace *ns = &root_ns->ns; + struct fs_prio *prio; + int start_level = 0; + + fs_for_each_prio(prio, ns) { + set_prio_attrs_in_prio(prio, start_level); + start_level += prio->num_levels; + } +} + +#define ANCHOR_PRIO 0 +#define ANCHOR_SIZE 1 +#define ANCHOR_LEVEL 0 +static int create_anchor_flow_table(struct mlx5_flow_steering *steering) +{ + struct mlx5_flow_namespace *ns = NULL; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_table *ft; + + ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR); + if (WARN_ON(!ns)) + return -EINVAL; + + ft_attr.max_fte = ANCHOR_SIZE; + ft_attr.level = ANCHOR_LEVEL; + ft_attr.prio = ANCHOR_PRIO; + + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + mlx5_core_err(steering->dev, "Failed to create last anchor flow table"); + return PTR_ERR(ft); + } + return 0; +} + +static int init_root_ns(struct mlx5_flow_steering *steering) +{ + int err; + + steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX); + if (!steering->root_ns) + return -ENOMEM; + + err = init_root_tree(steering, &root_fs, &steering->root_ns->ns.node); + if (err) + goto out_err; + + set_prio_attrs(steering->root_ns); + err = create_anchor_flow_table(steering); + if (err) + goto out_err; + + return 0; + +out_err: + cleanup_root_ns(steering->root_ns); + steering->root_ns = NULL; + return err; +} + +static void clean_tree(struct fs_node *node) +{ + if (node) { + struct fs_node *iter; + struct fs_node *temp; + + tree_get_node(node); + list_for_each_entry_safe(iter, temp, &node->children, list) + clean_tree(iter); + tree_put_node(node, false); + tree_remove_node(node, false); + } +} + +static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns) +{ + if (!root_ns) + return; + + clean_tree(&root_ns->ns.node); +} + +static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering) +{ + struct fs_prio *prio; + + steering->sniffer_tx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_TX); + if (!steering->sniffer_tx_root_ns) + return -ENOMEM; + + /* Create single prio */ + prio = fs_create_prio(&steering->sniffer_tx_root_ns->ns, 0, 1); + return PTR_ERR_OR_ZERO(prio); +} + +static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering) +{ + struct fs_prio *prio; + + steering->sniffer_rx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_RX); + if (!steering->sniffer_rx_root_ns) + return -ENOMEM; + + /* Create single prio */ + prio = fs_create_prio(&steering->sniffer_rx_root_ns->ns, 0, 1); + return PTR_ERR_OR_ZERO(prio); +} + +#define PORT_SEL_NUM_LEVELS 3 +static int init_port_sel_root_ns(struct mlx5_flow_steering *steering) +{ + struct fs_prio *prio; + + steering->port_sel_root_ns = create_root_ns(steering, FS_FT_PORT_SEL); + if (!steering->port_sel_root_ns) + return -ENOMEM; + + /* Create single prio */ + prio = fs_create_prio(&steering->port_sel_root_ns->ns, 0, + PORT_SEL_NUM_LEVELS); + return PTR_ERR_OR_ZERO(prio); +} + +static int init_rdma_rx_root_ns(struct mlx5_flow_steering *steering) +{ + int err; + + steering->rdma_rx_root_ns = create_root_ns(steering, FS_FT_RDMA_RX); + if (!steering->rdma_rx_root_ns) + return -ENOMEM; + + err = init_root_tree(steering, &rdma_rx_root_fs, + &steering->rdma_rx_root_ns->ns.node); + if (err) + goto out_err; + + set_prio_attrs(steering->rdma_rx_root_ns); + + return 0; + +out_err: + cleanup_root_ns(steering->rdma_rx_root_ns); + steering->rdma_rx_root_ns = NULL; + return err; +} + +static int init_rdma_tx_root_ns(struct mlx5_flow_steering *steering) +{ + int err; + + steering->rdma_tx_root_ns = create_root_ns(steering, FS_FT_RDMA_TX); + if (!steering->rdma_tx_root_ns) + return -ENOMEM; + + err = init_root_tree(steering, &rdma_tx_root_fs, + &steering->rdma_tx_root_ns->ns.node); + if (err) + goto out_err; + + set_prio_attrs(steering->rdma_tx_root_ns); + + return 0; + +out_err: + cleanup_root_ns(steering->rdma_tx_root_ns); + steering->rdma_tx_root_ns = NULL; + return err; +} + +/* FT and tc chains are stored in the same array so we can re-use the + * mlx5_get_fdb_sub_ns() and tc api for FT chains. + * When creating a new ns for each chain store it in the first available slot. + * Assume tc chains are created and stored first and only then the FT chain. + */ +static void store_fdb_sub_ns_prio_chain(struct mlx5_flow_steering *steering, + struct mlx5_flow_namespace *ns) +{ + int chain = 0; + + while (steering->fdb_sub_ns[chain]) + ++chain; + + steering->fdb_sub_ns[chain] = ns; +} + +static int create_fdb_sub_ns_prio_chain(struct mlx5_flow_steering *steering, + struct fs_prio *maj_prio) +{ + struct mlx5_flow_namespace *ns; + struct fs_prio *min_prio; + int prio; + + ns = fs_create_namespace(maj_prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF); + if (IS_ERR(ns)) + return PTR_ERR(ns); + + for (prio = 0; prio < FDB_TC_MAX_PRIO; prio++) { + min_prio = fs_create_prio(ns, prio, FDB_TC_LEVELS_PER_PRIO); + if (IS_ERR(min_prio)) + return PTR_ERR(min_prio); + } + + store_fdb_sub_ns_prio_chain(steering, ns); + + return 0; +} + +static int create_fdb_chains(struct mlx5_flow_steering *steering, + int fs_prio, + int chains) +{ + struct fs_prio *maj_prio; + int levels; + int chain; + int err; + + levels = FDB_TC_LEVELS_PER_PRIO * FDB_TC_MAX_PRIO * chains; + maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns, + fs_prio, + levels); + if (IS_ERR(maj_prio)) + return PTR_ERR(maj_prio); + + for (chain = 0; chain < chains; chain++) { + err = create_fdb_sub_ns_prio_chain(steering, maj_prio); + if (err) + return err; + } + + return 0; +} + +static int create_fdb_fast_path(struct mlx5_flow_steering *steering) +{ + int err; + + steering->fdb_sub_ns = kcalloc(FDB_NUM_CHAINS, + sizeof(*steering->fdb_sub_ns), + GFP_KERNEL); + if (!steering->fdb_sub_ns) + return -ENOMEM; + + err = create_fdb_chains(steering, FDB_TC_OFFLOAD, FDB_TC_MAX_CHAIN + 1); + if (err) + return err; + + err = create_fdb_chains(steering, FDB_FT_OFFLOAD, 1); + if (err) + return err; + + return 0; +} + +static int create_fdb_bypass(struct mlx5_flow_steering *steering) +{ + struct mlx5_flow_namespace *ns; + struct fs_prio *prio; + int i; + + prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BYPASS_PATH, 0); + if (IS_ERR(prio)) + return PTR_ERR(prio); + + ns = fs_create_namespace(prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF); + if (IS_ERR(ns)) + return PTR_ERR(ns); + + for (i = 0; i < MLX5_BY_PASS_NUM_REGULAR_PRIOS; i++) { + prio = fs_create_prio(ns, i, 1); + if (IS_ERR(prio)) + return PTR_ERR(prio); + } + return 0; +} + +static void cleanup_fdb_root_ns(struct mlx5_flow_steering *steering) +{ + cleanup_root_ns(steering->fdb_root_ns); + steering->fdb_root_ns = NULL; + kfree(steering->fdb_sub_ns); + steering->fdb_sub_ns = NULL; +} + +static int init_fdb_root_ns(struct mlx5_flow_steering *steering) +{ + struct fs_prio *maj_prio; + int err; + + steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB); + if (!steering->fdb_root_ns) + return -ENOMEM; + + err = create_fdb_bypass(steering); + if (err) + goto out_err; + + maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_CRYPTO_INGRESS, 3); + if (IS_ERR(maj_prio)) { + err = PTR_ERR(maj_prio); + goto out_err; + } + + err = create_fdb_fast_path(steering); + if (err) + goto out_err; + + maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_TC_MISS, 1); + if (IS_ERR(maj_prio)) { + err = PTR_ERR(maj_prio); + goto out_err; + } + + maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BR_OFFLOAD, 4); + if (IS_ERR(maj_prio)) { + err = PTR_ERR(maj_prio); + goto out_err; + } + + maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_SLOW_PATH, 1); + if (IS_ERR(maj_prio)) { + err = PTR_ERR(maj_prio); + goto out_err; + } + + maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_CRYPTO_EGRESS, 3); + if (IS_ERR(maj_prio)) { + err = PTR_ERR(maj_prio); + goto out_err; + } + + /* We put this priority last, knowing that nothing will get here + * unless explicitly forwarded to. This is possible because the + * slow path tables have catch all rules and nothing gets passed + * those tables. + */ + maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_PER_VPORT, 1); + if (IS_ERR(maj_prio)) { + err = PTR_ERR(maj_prio); + goto out_err; + } + + set_prio_attrs(steering->fdb_root_ns); + return 0; + +out_err: + cleanup_fdb_root_ns(steering); + return err; +} + +static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering, int vport) +{ + struct fs_prio *prio; + + steering->esw_egress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_EGRESS_ACL); + if (!steering->esw_egress_root_ns[vport]) + return -ENOMEM; + + /* create 1 prio*/ + prio = fs_create_prio(&steering->esw_egress_root_ns[vport]->ns, 0, 1); + return PTR_ERR_OR_ZERO(prio); +} + +static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering, int vport) +{ + struct fs_prio *prio; + + steering->esw_ingress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_INGRESS_ACL); + if (!steering->esw_ingress_root_ns[vport]) + return -ENOMEM; + + /* create 1 prio*/ + prio = fs_create_prio(&steering->esw_ingress_root_ns[vport]->ns, 0, 1); + return PTR_ERR_OR_ZERO(prio); +} + +int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int err; + int i; + + steering->esw_egress_root_ns = + kcalloc(total_vports, + sizeof(*steering->esw_egress_root_ns), + GFP_KERNEL); + if (!steering->esw_egress_root_ns) + return -ENOMEM; + + for (i = 0; i < total_vports; i++) { + err = init_egress_acl_root_ns(steering, i); + if (err) + goto cleanup_root_ns; + } + steering->esw_egress_acl_vports = total_vports; + return 0; + +cleanup_root_ns: + for (i--; i >= 0; i--) + cleanup_root_ns(steering->esw_egress_root_ns[i]); + kfree(steering->esw_egress_root_ns); + steering->esw_egress_root_ns = NULL; + return err; +} + +void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int i; + + if (!steering->esw_egress_root_ns) + return; + + for (i = 0; i < steering->esw_egress_acl_vports; i++) + cleanup_root_ns(steering->esw_egress_root_ns[i]); + + kfree(steering->esw_egress_root_ns); + steering->esw_egress_root_ns = NULL; +} + +int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int err; + int i; + + steering->esw_ingress_root_ns = + kcalloc(total_vports, + sizeof(*steering->esw_ingress_root_ns), + GFP_KERNEL); + if (!steering->esw_ingress_root_ns) + return -ENOMEM; + + for (i = 0; i < total_vports; i++) { + err = init_ingress_acl_root_ns(steering, i); + if (err) + goto cleanup_root_ns; + } + steering->esw_ingress_acl_vports = total_vports; + return 0; + +cleanup_root_ns: + for (i--; i >= 0; i--) + cleanup_root_ns(steering->esw_ingress_root_ns[i]); + kfree(steering->esw_ingress_root_ns); + steering->esw_ingress_root_ns = NULL; + return err; +} + +void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int i; + + if (!steering->esw_ingress_root_ns) + return; + + for (i = 0; i < steering->esw_ingress_acl_vports; i++) + cleanup_root_ns(steering->esw_ingress_root_ns[i]); + + kfree(steering->esw_ingress_root_ns); + steering->esw_ingress_root_ns = NULL; +} + +u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_namespace *ns; + + ns = mlx5_get_flow_namespace(dev, type); + if (!ns) + return 0; + + root = find_root(&ns->node); + if (!root) + return 0; + + return root->cmds->get_capabilities(root, root->table_type); +} + +static int init_egress_root_ns(struct mlx5_flow_steering *steering) +{ + int err; + + steering->egress_root_ns = create_root_ns(steering, + FS_FT_NIC_TX); + if (!steering->egress_root_ns) + return -ENOMEM; + + err = init_root_tree(steering, &egress_root_fs, + &steering->egress_root_ns->ns.node); + if (err) + goto cleanup; + set_prio_attrs(steering->egress_root_ns); + return 0; +cleanup: + cleanup_root_ns(steering->egress_root_ns); + steering->egress_root_ns = NULL; + return err; +} + +static int mlx5_fs_mode_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + char *value = val.vstr; + int err = 0; + + if (!strcmp(value, "dmfs")) { + return 0; + } else if (!strcmp(value, "smfs")) { + u8 eswitch_mode; + bool smfs_cap; + + eswitch_mode = mlx5_eswitch_mode(dev); + smfs_cap = mlx5_fs_dr_is_supported(dev); + + if (!smfs_cap) { + err = -EOPNOTSUPP; + NL_SET_ERR_MSG_MOD(extack, + "Software managed steering is not supported by current device"); + } + + else if (eswitch_mode == MLX5_ESWITCH_OFFLOADS) { + NL_SET_ERR_MSG_MOD(extack, + "Software managed steering is not supported when eswitch offloads enabled."); + err = -EOPNOTSUPP; + } + } else { + NL_SET_ERR_MSG_MOD(extack, + "Bad parameter: supported values are [\"dmfs\", \"smfs\"]"); + err = -EINVAL; + } + + return err; +} + +static int mlx5_fs_mode_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + enum mlx5_flow_steering_mode mode; + + if (!strcmp(ctx->val.vstr, "smfs")) + mode = MLX5_FLOW_STEERING_MODE_SMFS; + else + mode = MLX5_FLOW_STEERING_MODE_DMFS; + dev->priv.steering->mode = mode; + + return 0; +} + +static int mlx5_fs_mode_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) + strcpy(ctx->val.vstr, "smfs"); + else + strcpy(ctx->val.vstr, "dmfs"); + return 0; +} + +static const struct devlink_param mlx5_fs_params[] = { + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE, + "flow_steering_mode", DEVLINK_PARAM_TYPE_STRING, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + mlx5_fs_mode_get, mlx5_fs_mode_set, + mlx5_fs_mode_validate), +}; + +void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + + cleanup_root_ns(steering->root_ns); + cleanup_fdb_root_ns(steering); + cleanup_root_ns(steering->port_sel_root_ns); + cleanup_root_ns(steering->sniffer_rx_root_ns); + cleanup_root_ns(steering->sniffer_tx_root_ns); + cleanup_root_ns(steering->rdma_rx_root_ns); + cleanup_root_ns(steering->rdma_tx_root_ns); + cleanup_root_ns(steering->egress_root_ns); + + devl_params_unregister(priv_to_devlink(dev), mlx5_fs_params, + ARRAY_SIZE(mlx5_fs_params)); +} + +int mlx5_fs_core_init(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int err; + + err = devl_params_register(priv_to_devlink(dev), mlx5_fs_params, + ARRAY_SIZE(mlx5_fs_params)); + if (err) + return err; + + if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && + (MLX5_CAP_GEN(dev, nic_flow_table))) || + ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && + MLX5_CAP_GEN(dev, ipoib_enhanced_offloads))) && + MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_support)) { + err = init_root_ns(steering); + if (err) + goto err; + } + + if (MLX5_ESWITCH_MANAGER(dev)) { + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) { + err = init_fdb_root_ns(steering); + if (err) + goto err; + } + } + + if (MLX5_CAP_FLOWTABLE_SNIFFER_RX(dev, ft_support)) { + err = init_sniffer_rx_root_ns(steering); + if (err) + goto err; + } + + if (MLX5_CAP_FLOWTABLE_SNIFFER_TX(dev, ft_support)) { + err = init_sniffer_tx_root_ns(steering); + if (err) + goto err; + } + + if (MLX5_CAP_FLOWTABLE_PORT_SELECTION(dev, ft_support)) { + err = init_port_sel_root_ns(steering); + if (err) + goto err; + } + + if (MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) && + MLX5_CAP_FLOWTABLE_RDMA_RX(dev, table_miss_action_domain)) { + err = init_rdma_rx_root_ns(steering); + if (err) + goto err; + } + + if (MLX5_CAP_FLOWTABLE_RDMA_TX(dev, ft_support)) { + err = init_rdma_tx_root_ns(steering); + if (err) + goto err; + } + + if (MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) { + err = init_egress_root_ns(steering); + if (err) + goto err; + } + + return 0; + +err: + mlx5_fs_core_cleanup(dev); + return err; +} + +void mlx5_fs_core_free(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + + kmem_cache_destroy(steering->ftes_cache); + kmem_cache_destroy(steering->fgs_cache); + kfree(steering); + mlx5_ft_pool_destroy(dev); + mlx5_cleanup_fc_stats(dev); +} + +int mlx5_fs_core_alloc(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering; + int err = 0; + + err = mlx5_init_fc_stats(dev); + if (err) + return err; + + err = mlx5_ft_pool_init(dev); + if (err) + goto err; + + steering = kzalloc(sizeof(*steering), GFP_KERNEL); + if (!steering) { + err = -ENOMEM; + goto err; + } + + steering->dev = dev; + dev->priv.steering = steering; + + if (mlx5_fs_dr_is_supported(dev)) + steering->mode = MLX5_FLOW_STEERING_MODE_SMFS; + else + steering->mode = MLX5_FLOW_STEERING_MODE_DMFS; + + steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs", + sizeof(struct mlx5_flow_group), 0, + 0, NULL); + steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0, + 0, NULL); + if (!steering->ftes_cache || !steering->fgs_cache) { + err = -ENOMEM; + goto err; + } + + return 0; + +err: + mlx5_fs_core_free(dev); + return err; +} + +int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn) +{ + struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns; + struct mlx5_ft_underlay_qp *new_uqp; + int err = 0; + + new_uqp = kzalloc(sizeof(*new_uqp), GFP_KERNEL); + if (!new_uqp) + return -ENOMEM; + + mutex_lock(&root->chain_lock); + + if (!root->root_ft) { + err = -EINVAL; + goto update_ft_fail; + } + + err = root->cmds->update_root_ft(root, root->root_ft, underlay_qpn, + false); + if (err) { + mlx5_core_warn(dev, "Failed adding underlay QPN (%u) to root FT err(%d)\n", + underlay_qpn, err); + goto update_ft_fail; + } + + new_uqp->qpn = underlay_qpn; + list_add_tail(&new_uqp->list, &root->underlay_qpns); + + mutex_unlock(&root->chain_lock); + + return 0; + +update_ft_fail: + mutex_unlock(&root->chain_lock); + kfree(new_uqp); + return err; +} +EXPORT_SYMBOL(mlx5_fs_add_rx_underlay_qpn); + +int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn) +{ + struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns; + struct mlx5_ft_underlay_qp *uqp; + bool found = false; + int err = 0; + + mutex_lock(&root->chain_lock); + list_for_each_entry(uqp, &root->underlay_qpns, list) { + if (uqp->qpn == underlay_qpn) { + found = true; + break; + } + } + + if (!found) { + mlx5_core_warn(dev, "Failed finding underlay qp (%u) in qpn list\n", + underlay_qpn); + err = -EINVAL; + goto out; + } + + err = root->cmds->update_root_ft(root, root->root_ft, underlay_qpn, + true); + if (err) + mlx5_core_warn(dev, "Failed removing underlay QPN (%u) from root FT err(%d)\n", + underlay_qpn, err); + + list_del(&uqp->list); + mutex_unlock(&root->chain_lock); + kfree(uqp); + + return 0; + +out: + mutex_unlock(&root->chain_lock); + return err; +} +EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn); + +static struct mlx5_flow_root_namespace +*get_root_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type ns_type) +{ + struct mlx5_flow_namespace *ns; + + if (ns_type == MLX5_FLOW_NAMESPACE_ESW_EGRESS || + ns_type == MLX5_FLOW_NAMESPACE_ESW_INGRESS) + ns = mlx5_get_flow_vport_acl_namespace(dev, ns_type, 0); + else + ns = mlx5_get_flow_namespace(dev, ns_type); + if (!ns) + return NULL; + + return find_root(&ns->node); +} + +struct mlx5_modify_hdr *mlx5_modify_header_alloc(struct mlx5_core_dev *dev, + u8 ns_type, u8 num_actions, + void *modify_actions) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_modify_hdr *modify_hdr; + int err; + + root = get_root_namespace(dev, ns_type); + if (!root) + return ERR_PTR(-EOPNOTSUPP); + + modify_hdr = kzalloc(sizeof(*modify_hdr), GFP_KERNEL); + if (!modify_hdr) + return ERR_PTR(-ENOMEM); + + modify_hdr->ns_type = ns_type; + err = root->cmds->modify_header_alloc(root, ns_type, num_actions, + modify_actions, modify_hdr); + if (err) { + kfree(modify_hdr); + return ERR_PTR(err); + } + + return modify_hdr; +} +EXPORT_SYMBOL(mlx5_modify_header_alloc); + +void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, + struct mlx5_modify_hdr *modify_hdr) +{ + struct mlx5_flow_root_namespace *root; + + root = get_root_namespace(dev, modify_hdr->ns_type); + if (WARN_ON(!root)) + return; + root->cmds->modify_header_dealloc(root, modify_hdr); + kfree(modify_hdr); +} +EXPORT_SYMBOL(mlx5_modify_header_dealloc); + +struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev, + struct mlx5_pkt_reformat_params *params, + enum mlx5_flow_namespace_type ns_type) +{ + struct mlx5_pkt_reformat *pkt_reformat; + struct mlx5_flow_root_namespace *root; + int err; + + root = get_root_namespace(dev, ns_type); + if (!root) + return ERR_PTR(-EOPNOTSUPP); + + pkt_reformat = kzalloc(sizeof(*pkt_reformat), GFP_KERNEL); + if (!pkt_reformat) + return ERR_PTR(-ENOMEM); + + pkt_reformat->ns_type = ns_type; + pkt_reformat->reformat_type = params->type; + err = root->cmds->packet_reformat_alloc(root, params, ns_type, + pkt_reformat); + if (err) { + kfree(pkt_reformat); + return ERR_PTR(err); + } + + return pkt_reformat; +} +EXPORT_SYMBOL(mlx5_packet_reformat_alloc); + +void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev, + struct mlx5_pkt_reformat *pkt_reformat) +{ + struct mlx5_flow_root_namespace *root; + + root = get_root_namespace(dev, pkt_reformat->ns_type); + if (WARN_ON(!root)) + return; + root->cmds->packet_reformat_dealloc(root, pkt_reformat); + kfree(pkt_reformat); +} +EXPORT_SYMBOL(mlx5_packet_reformat_dealloc); + +int mlx5_get_match_definer_id(struct mlx5_flow_definer *definer) +{ + return definer->id; +} + +struct mlx5_flow_definer * +mlx5_create_match_definer(struct mlx5_core_dev *dev, + enum mlx5_flow_namespace_type ns_type, u16 format_id, + u32 *match_mask) +{ + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_definer *definer; + int id; + + root = get_root_namespace(dev, ns_type); + if (!root) + return ERR_PTR(-EOPNOTSUPP); + + definer = kzalloc(sizeof(*definer), GFP_KERNEL); + if (!definer) + return ERR_PTR(-ENOMEM); + + definer->ns_type = ns_type; + id = root->cmds->create_match_definer(root, format_id, match_mask); + if (id < 0) { + mlx5_core_warn(root->dev, "Failed to create match definer (%d)\n", id); + kfree(definer); + return ERR_PTR(id); + } + definer->id = id; + return definer; +} + +void mlx5_destroy_match_definer(struct mlx5_core_dev *dev, + struct mlx5_flow_definer *definer) +{ + struct mlx5_flow_root_namespace *root; + + root = get_root_namespace(dev, definer->ns_type); + if (WARN_ON(!root)) + return; + + root->cmds->destroy_match_definer(root, definer->id); + kfree(definer); +} + +int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns, + u16 peer_vhca_id) +{ + if (peer_ns && ns->mode != peer_ns->mode) { + mlx5_core_err(ns->dev, + "Can't peer namespace of different steering mode\n"); + return -EINVAL; + } + + return ns->cmds->set_peer(ns, peer_ns, peer_vhca_id); +} + +/* This function should be called only at init stage of the namespace. + * It is not safe to call this function while steering operations + * are executed in the namespace. + */ +int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns, + enum mlx5_flow_steering_mode mode) +{ + struct mlx5_flow_root_namespace *root; + const struct mlx5_flow_cmds *cmds; + int err; + + root = find_root(&ns->node); + if (&root->ns != ns) + /* Can't set cmds to non root namespace */ + return -EINVAL; + + if (root->table_type != FS_FT_FDB) + return -EOPNOTSUPP; + + if (root->mode == mode) + return 0; + + if (mode == MLX5_FLOW_STEERING_MODE_SMFS) + cmds = mlx5_fs_cmd_get_dr_cmds(); + else + cmds = mlx5_fs_cmd_get_fw_cmds(); + if (!cmds) + return -EOPNOTSUPP; + + err = cmds->create_ns(root); + if (err) { + mlx5_core_err(root->dev, "Failed to create flow namespace (%d)\n", + err); + return err; + } + + root->cmds->destroy_ns(root); + root->cmds = cmds; + root->mode = mode; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h new file mode 100644 index 0000000000..4aed1768b8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -0,0 +1,374 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _MLX5_FS_CORE_ +#define _MLX5_FS_CORE_ + +#include +#include +#include +#include +#include + +#define FDB_TC_MAX_CHAIN 3 +#define FDB_FT_CHAIN (FDB_TC_MAX_CHAIN + 1) +#define FDB_TC_SLOW_PATH_CHAIN (FDB_FT_CHAIN + 1) + +/* The index of the last real chain (FT) + 1 as chain zero is valid as well */ +#define FDB_NUM_CHAINS (FDB_FT_CHAIN + 1) + +#define FDB_TC_MAX_PRIO 16 +#define FDB_TC_LEVELS_PER_PRIO 2 + +struct mlx5_flow_definer { + enum mlx5_flow_namespace_type ns_type; + u32 id; +}; + +enum mlx5_flow_resource_owner { + MLX5_FLOW_RESOURCE_OWNER_FW, + MLX5_FLOW_RESOURCE_OWNER_SW, +}; + +struct mlx5_modify_hdr { + enum mlx5_flow_namespace_type ns_type; + enum mlx5_flow_resource_owner owner; + union { + struct mlx5_fs_dr_action action; + u32 id; + }; +}; + +struct mlx5_pkt_reformat { + enum mlx5_flow_namespace_type ns_type; + int reformat_type; /* from mlx5_ifc */ + enum mlx5_flow_resource_owner owner; + union { + struct mlx5_fs_dr_action action; + u32 id; + }; +}; + +/* FS_TYPE_PRIO_CHAINS is a PRIO that will have namespaces only, + * and those are in parallel to one another when going over them to connect + * a new flow table. Meaning the last flow table in a TYPE_PRIO prio in one + * parallel namespace will not automatically connect to the first flow table + * found in any prio in any next namespace, but skip the entire containing + * TYPE_PRIO_CHAINS prio. + * + * This is used to implement tc chains, each chain of prios is a different + * namespace inside a containing TYPE_PRIO_CHAINS prio. + */ + +enum fs_node_type { + FS_TYPE_NAMESPACE, + FS_TYPE_PRIO, + FS_TYPE_PRIO_CHAINS, + FS_TYPE_FLOW_TABLE, + FS_TYPE_FLOW_GROUP, + FS_TYPE_FLOW_ENTRY, + FS_TYPE_FLOW_DEST +}; + +enum fs_flow_table_type { + FS_FT_NIC_RX = 0x0, + FS_FT_NIC_TX = 0x1, + FS_FT_ESW_EGRESS_ACL = 0x2, + FS_FT_ESW_INGRESS_ACL = 0x3, + FS_FT_FDB = 0X4, + FS_FT_SNIFFER_RX = 0X5, + FS_FT_SNIFFER_TX = 0X6, + FS_FT_RDMA_RX = 0X7, + FS_FT_RDMA_TX = 0X8, + FS_FT_PORT_SEL = 0X9, + FS_FT_MAX_TYPE = FS_FT_PORT_SEL, +}; + +enum fs_flow_table_op_mod { + FS_FT_OP_MOD_NORMAL, + FS_FT_OP_MOD_LAG_DEMUX, +}; + +enum fs_fte_status { + FS_FTE_STATUS_EXISTING = 1UL << 0, +}; + +enum mlx5_flow_steering_mode { + MLX5_FLOW_STEERING_MODE_DMFS, + MLX5_FLOW_STEERING_MODE_SMFS +}; + +enum mlx5_flow_steering_capabilty { + MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX = 1UL << 0, + MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX = 1UL << 1, + MLX5_FLOW_STEERING_CAP_MATCH_RANGES = 1UL << 2, +}; + +struct mlx5_flow_steering { + struct mlx5_core_dev *dev; + enum mlx5_flow_steering_mode mode; + struct kmem_cache *fgs_cache; + struct kmem_cache *ftes_cache; + struct mlx5_flow_root_namespace *root_ns; + struct mlx5_flow_root_namespace *fdb_root_ns; + struct mlx5_flow_namespace **fdb_sub_ns; + struct mlx5_flow_root_namespace **esw_egress_root_ns; + struct mlx5_flow_root_namespace **esw_ingress_root_ns; + struct mlx5_flow_root_namespace *sniffer_tx_root_ns; + struct mlx5_flow_root_namespace *sniffer_rx_root_ns; + struct mlx5_flow_root_namespace *rdma_rx_root_ns; + struct mlx5_flow_root_namespace *rdma_tx_root_ns; + struct mlx5_flow_root_namespace *egress_root_ns; + struct mlx5_flow_root_namespace *port_sel_root_ns; + int esw_egress_acl_vports; + int esw_ingress_acl_vports; +}; + +struct fs_node { + struct list_head list; + struct list_head children; + enum fs_node_type type; + struct fs_node *parent; + struct fs_node *root; + /* lock the node for writing and traversing */ + struct rw_semaphore lock; + refcount_t refcount; + bool active; + void (*del_hw_func)(struct fs_node *); + void (*del_sw_func)(struct fs_node *); + atomic_t version; +}; + +struct mlx5_flow_rule { + struct fs_node node; + struct mlx5_flow_table *ft; + struct mlx5_flow_destination dest_attr; + /* next_ft should be accessed under chain_lock and only of + * destination type is FWD_NEXT_fT. + */ + struct list_head next_ft; + u32 sw_action; +}; + +struct mlx5_flow_handle { + int num_rules; + struct mlx5_flow_rule *rule[]; +}; + +/* Type of children is mlx5_flow_group */ +struct mlx5_flow_table { + struct fs_node node; + struct mlx5_fs_dr_table fs_dr_table; + u32 id; + u16 vport; + unsigned int max_fte; + unsigned int level; + enum fs_flow_table_type type; + enum fs_flow_table_op_mod op_mod; + struct { + bool active; + unsigned int required_groups; + unsigned int group_size; + unsigned int num_groups; + unsigned int max_fte; + } autogroup; + /* Protect fwd_rules */ + struct mutex lock; + /* FWD rules that point on this flow table */ + struct list_head fwd_rules; + u32 flags; + struct rhltable fgs_hash; + enum mlx5_flow_table_miss_action def_miss_action; + struct mlx5_flow_namespace *ns; +}; + +struct mlx5_ft_underlay_qp { + struct list_head list; + u32 qpn; +}; + +#define MLX5_FTE_MATCH_PARAM_RESERVED reserved_at_e00 +/* Calculate the fte_match_param length and without the reserved length. + * Make sure the reserved field is the last. + */ +#define MLX5_ST_SZ_DW_MATCH_PARAM \ + ((MLX5_BYTE_OFF(fte_match_param, MLX5_FTE_MATCH_PARAM_RESERVED) / sizeof(u32)) + \ + BUILD_BUG_ON_ZERO(MLX5_ST_SZ_BYTES(fte_match_param) != \ + MLX5_FLD_SZ_BYTES(fte_match_param, \ + MLX5_FTE_MATCH_PARAM_RESERVED) +\ + MLX5_BYTE_OFF(fte_match_param, \ + MLX5_FTE_MATCH_PARAM_RESERVED))) + +/* Type of children is mlx5_flow_rule */ +struct fs_fte { + struct fs_node node; + struct mlx5_fs_dr_rule fs_dr_rule; + u32 val[MLX5_ST_SZ_DW_MATCH_PARAM]; + u32 dests_size; + u32 fwd_dests; + u32 index; + struct mlx5_flow_context flow_context; + struct mlx5_flow_act action; + enum fs_fte_status status; + struct mlx5_fc *counter; + struct rhash_head hash; + int modify_mask; +}; + +/* Type of children is mlx5_flow_table/namespace */ +struct fs_prio { + struct fs_node node; + unsigned int num_levels; + unsigned int start_level; + unsigned int prio; + unsigned int num_ft; +}; + +/* Type of children is fs_prio */ +struct mlx5_flow_namespace { + /* parent == NULL => root ns */ + struct fs_node node; + enum mlx5_flow_table_miss_action def_miss_action; +}; + +struct mlx5_flow_group_mask { + u8 match_criteria_enable; + u32 match_criteria[MLX5_ST_SZ_DW_MATCH_PARAM]; +}; + +/* Type of children is fs_fte */ +struct mlx5_flow_group { + struct fs_node node; + struct mlx5_fs_dr_matcher fs_dr_matcher; + struct mlx5_flow_group_mask mask; + u32 start_index; + u32 max_ftes; + struct ida fte_allocator; + u32 id; + struct rhashtable ftes_hash; + struct rhlist_head hash; +}; + +struct mlx5_flow_root_namespace { + struct mlx5_flow_namespace ns; + enum mlx5_flow_steering_mode mode; + struct mlx5_fs_dr_domain fs_dr_domain; + enum fs_flow_table_type table_type; + struct mlx5_core_dev *dev; + struct mlx5_flow_table *root_ft; + /* Should be held when chaining flow tables */ + struct mutex chain_lock; + struct list_head underlay_qpns; + const struct mlx5_flow_cmds *cmds; +}; + +int mlx5_init_fc_stats(struct mlx5_core_dev *dev); +void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev); +void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev, + struct delayed_work *dwork, + unsigned long delay); +void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, + unsigned long interval); + +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void); + +int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns, + u16 peer_vhca_id); + +int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns, + enum mlx5_flow_steering_mode mode); + +int mlx5_fs_core_alloc(struct mlx5_core_dev *dev); +void mlx5_fs_core_free(struct mlx5_core_dev *dev); +int mlx5_fs_core_init(struct mlx5_core_dev *dev); +void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev); + +int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports); +void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev); +int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports); +void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev); + +u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type); + +struct mlx5_flow_root_namespace *find_root(struct fs_node *node); + +#define fs_get_obj(v, _node) {v = container_of((_node), typeof(*v), node); } + +#define fs_list_for_each_entry(pos, root) \ + list_for_each_entry(pos, root, node.list) + +#define fs_list_for_each_entry_safe(pos, tmp, root) \ + list_for_each_entry_safe(pos, tmp, root, node.list) + +#define fs_for_each_ns_or_ft_reverse(pos, prio) \ + list_for_each_entry_reverse(pos, &(prio)->node.children, list) + +#define fs_for_each_ns_or_ft(pos, prio) \ + list_for_each_entry(pos, (&(prio)->node.children), list) + +#define fs_for_each_prio(pos, ns) \ + fs_list_for_each_entry(pos, &(ns)->node.children) + +#define fs_for_each_ns(pos, prio) \ + fs_list_for_each_entry(pos, &(prio)->node.children) + +#define fs_for_each_ft(pos, prio) \ + fs_list_for_each_entry(pos, &(prio)->node.children) + +#define fs_for_each_ft_safe(pos, tmp, prio) \ + fs_list_for_each_entry_safe(pos, tmp, &(prio)->node.children) + +#define fs_for_each_fg(pos, ft) \ + fs_list_for_each_entry(pos, &(ft)->node.children) + +#define fs_for_each_fte(pos, fg) \ + fs_list_for_each_entry(pos, &(fg)->node.children) + +#define fs_for_each_dst(pos, fte) \ + fs_list_for_each_entry(pos, &(fte)->node.children) + +#define MLX5_CAP_FLOWTABLE_TYPE(mdev, cap, type) ( \ + (type == FS_FT_NIC_RX) ? MLX5_CAP_FLOWTABLE_NIC_RX(mdev, cap) : \ + (type == FS_FT_NIC_TX) ? MLX5_CAP_FLOWTABLE_NIC_TX(mdev, cap) : \ + (type == FS_FT_ESW_EGRESS_ACL) ? MLX5_CAP_ESW_EGRESS_ACL(mdev, cap) : \ + (type == FS_FT_ESW_INGRESS_ACL) ? MLX5_CAP_ESW_INGRESS_ACL(mdev, cap) : \ + (type == FS_FT_FDB) ? MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) : \ + (type == FS_FT_SNIFFER_RX) ? MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) : \ + (type == FS_FT_SNIFFER_TX) ? MLX5_CAP_FLOWTABLE_SNIFFER_TX(mdev, cap) : \ + (type == FS_FT_RDMA_RX) ? MLX5_CAP_FLOWTABLE_RDMA_RX(mdev, cap) : \ + (type == FS_FT_RDMA_TX) ? MLX5_CAP_FLOWTABLE_RDMA_TX(mdev, cap) : \ + (type == FS_FT_PORT_SEL) ? MLX5_CAP_FLOWTABLE_PORT_SELECTION(mdev, cap) : \ + (BUILD_BUG_ON_ZERO(FS_FT_PORT_SEL != FS_FT_MAX_TYPE))\ + ) + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c new file mode 100644 index 0000000000..17fe30a4c0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -0,0 +1,772 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include "mlx5_core.h" +#include "fs_core.h" +#include "fs_cmd.h" + +#define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000) +#define MLX5_FC_BULK_QUERY_ALLOC_PERIOD msecs_to_jiffies(180 * 1000) +/* Max number of counters to query in bulk read is 32K */ +#define MLX5_SW_MAX_COUNTERS_BULK BIT(15) +#define MLX5_INIT_COUNTERS_BULK 8 +#define MLX5_FC_POOL_MAX_THRESHOLD BIT(18) +#define MLX5_FC_POOL_USED_BUFF_RATIO 10 + +struct mlx5_fc_cache { + u64 packets; + u64 bytes; + u64 lastuse; +}; + +struct mlx5_fc { + struct list_head list; + struct llist_node addlist; + struct llist_node dellist; + + /* last{packets,bytes} members are used when calculating the delta since + * last reading + */ + u64 lastpackets; + u64 lastbytes; + + struct mlx5_fc_bulk *bulk; + u32 id; + bool aging; + + struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; +}; + +static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev); +static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool); +static struct mlx5_fc *mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool); +static void mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct mlx5_fc *fc); + +/* locking scheme: + * + * It is the responsibility of the user to prevent concurrent calls or bad + * ordering to mlx5_fc_create(), mlx5_fc_destroy() and accessing a reference + * to struct mlx5_fc. + * e.g en_tc.c is protected by RTNL lock of its caller, and will never call a + * dump (access to struct mlx5_fc) after a counter is destroyed. + * + * access to counter list: + * - create (user context) + * - mlx5_fc_create() only adds to an addlist to be used by + * mlx5_fc_stats_work(). addlist is a lockless single linked list + * that doesn't require any additional synchronization when adding single + * node. + * - spawn thread to do the actual destroy + * + * - destroy (user context) + * - add a counter to lockless dellist + * - spawn thread to do the actual del + * + * - dump (user context) + * user should not call dump after destroy + * + * - query (single thread workqueue context) + * destroy/dump - no conflict (see destroy) + * query/dump - packets and bytes might be inconsistent (since update is not + * atomic) + * query/create - no conflict (see create) + * since every create/destroy spawn the work, only after necessary time has + * elapsed, the thread will actually query the hardware. + */ + +static struct list_head *mlx5_fc_counters_lookup_next(struct mlx5_core_dev *dev, + u32 id) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + unsigned long next_id = (unsigned long)id + 1; + struct mlx5_fc *counter; + unsigned long tmp; + + rcu_read_lock(); + /* skip counters that are in idr, but not yet in counters list */ + idr_for_each_entry_continue_ul(&fc_stats->counters_idr, + counter, tmp, next_id) { + if (!list_empty(&counter->list)) + break; + } + rcu_read_unlock(); + + return counter ? &counter->list : &fc_stats->counters; +} + +static void mlx5_fc_stats_insert(struct mlx5_core_dev *dev, + struct mlx5_fc *counter) +{ + struct list_head *next = mlx5_fc_counters_lookup_next(dev, counter->id); + + list_add_tail(&counter->list, next); +} + +static void mlx5_fc_stats_remove(struct mlx5_core_dev *dev, + struct mlx5_fc *counter) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + list_del(&counter->list); + + spin_lock(&fc_stats->counters_idr_lock); + WARN_ON(!idr_remove(&fc_stats->counters_idr, counter->id)); + spin_unlock(&fc_stats->counters_idr_lock); +} + +static int get_init_bulk_query_len(struct mlx5_core_dev *dev) +{ + return min_t(int, MLX5_INIT_COUNTERS_BULK, + (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk))); +} + +static int get_max_bulk_query_len(struct mlx5_core_dev *dev) +{ + return min_t(int, MLX5_SW_MAX_COUNTERS_BULK, + (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk))); +} + +static void update_counter_cache(int index, u32 *bulk_raw_data, + struct mlx5_fc_cache *cache) +{ + void *stats = MLX5_ADDR_OF(query_flow_counter_out, bulk_raw_data, + flow_statistics[index]); + u64 packets = MLX5_GET64(traffic_counter, stats, packets); + u64 bytes = MLX5_GET64(traffic_counter, stats, octets); + + if (cache->packets == packets) + return; + + cache->packets = packets; + cache->bytes = bytes; + cache->lastuse = jiffies; +} + +static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev, + struct mlx5_fc *first, + u32 last_id) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + bool query_more_counters = (first->id <= last_id); + int cur_bulk_len = fc_stats->bulk_query_len; + u32 *data = fc_stats->bulk_query_out; + struct mlx5_fc *counter = first; + u32 bulk_base_id; + int bulk_len; + int err; + + while (query_more_counters) { + /* first id must be aligned to 4 when using bulk query */ + bulk_base_id = counter->id & ~0x3; + + /* number of counters to query inc. the last counter */ + bulk_len = min_t(int, cur_bulk_len, + ALIGN(last_id - bulk_base_id + 1, 4)); + + err = mlx5_cmd_fc_bulk_query(dev, bulk_base_id, bulk_len, + data); + if (err) { + mlx5_core_err(dev, "Error doing bulk query: %d\n", err); + return; + } + query_more_counters = false; + + list_for_each_entry_from(counter, &fc_stats->counters, list) { + int counter_index = counter->id - bulk_base_id; + struct mlx5_fc_cache *cache = &counter->cache; + + if (counter->id >= bulk_base_id + bulk_len) { + query_more_counters = true; + break; + } + + update_counter_cache(counter_index, data, cache); + } + } +} + +static void mlx5_fc_free(struct mlx5_core_dev *dev, struct mlx5_fc *counter) +{ + mlx5_cmd_fc_free(dev, counter->id); + kfree(counter); +} + +static void mlx5_fc_release(struct mlx5_core_dev *dev, struct mlx5_fc *counter) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + if (counter->bulk) + mlx5_fc_pool_release_counter(&fc_stats->fc_pool, counter); + else + mlx5_fc_free(dev, counter); +} + +static void mlx5_fc_stats_bulk_query_size_increase(struct mlx5_core_dev *dev) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + int max_bulk_len = get_max_bulk_query_len(dev); + unsigned long now = jiffies; + u32 *bulk_query_out_tmp; + int max_out_len; + + if (fc_stats->bulk_query_alloc_failed && + time_before(now, fc_stats->next_bulk_query_alloc)) + return; + + max_out_len = mlx5_cmd_fc_get_bulk_query_out_len(max_bulk_len); + bulk_query_out_tmp = kzalloc(max_out_len, GFP_KERNEL); + if (!bulk_query_out_tmp) { + mlx5_core_warn_once(dev, + "Can't increase flow counters bulk query buffer size, insufficient memory, bulk_size(%d)\n", + max_bulk_len); + fc_stats->bulk_query_alloc_failed = true; + fc_stats->next_bulk_query_alloc = + now + MLX5_FC_BULK_QUERY_ALLOC_PERIOD; + return; + } + + kfree(fc_stats->bulk_query_out); + fc_stats->bulk_query_out = bulk_query_out_tmp; + fc_stats->bulk_query_len = max_bulk_len; + if (fc_stats->bulk_query_alloc_failed) { + mlx5_core_info(dev, + "Flow counters bulk query buffer size increased, bulk_size(%d)\n", + max_bulk_len); + fc_stats->bulk_query_alloc_failed = false; + } +} + +static void mlx5_fc_stats_work(struct work_struct *work) +{ + struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev, + priv.fc_stats.work.work); + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + /* Take dellist first to ensure that counters cannot be deleted before + * they are inserted. + */ + struct llist_node *dellist = llist_del_all(&fc_stats->dellist); + struct llist_node *addlist = llist_del_all(&fc_stats->addlist); + struct mlx5_fc *counter = NULL, *last = NULL, *tmp; + unsigned long now = jiffies; + + if (addlist || !list_empty(&fc_stats->counters)) + queue_delayed_work(fc_stats->wq, &fc_stats->work, + fc_stats->sampling_interval); + + llist_for_each_entry(counter, addlist, addlist) { + mlx5_fc_stats_insert(dev, counter); + fc_stats->num_counters++; + } + + llist_for_each_entry_safe(counter, tmp, dellist, dellist) { + mlx5_fc_stats_remove(dev, counter); + + mlx5_fc_release(dev, counter); + fc_stats->num_counters--; + } + + if (fc_stats->bulk_query_len < get_max_bulk_query_len(dev) && + fc_stats->num_counters > get_init_bulk_query_len(dev)) + mlx5_fc_stats_bulk_query_size_increase(dev); + + if (time_before(now, fc_stats->next_query) || + list_empty(&fc_stats->counters)) + return; + last = list_last_entry(&fc_stats->counters, struct mlx5_fc, list); + + counter = list_first_entry(&fc_stats->counters, struct mlx5_fc, + list); + if (counter) + mlx5_fc_stats_query_counter_range(dev, counter, last->id); + + fc_stats->next_query = now + fc_stats->sampling_interval; +} + +static struct mlx5_fc *mlx5_fc_single_alloc(struct mlx5_core_dev *dev) +{ + struct mlx5_fc *counter; + int err; + + counter = kzalloc(sizeof(*counter), GFP_KERNEL); + if (!counter) + return ERR_PTR(-ENOMEM); + + err = mlx5_cmd_fc_alloc(dev, &counter->id); + if (err) { + kfree(counter); + return ERR_PTR(err); + } + + return counter; +} + +static struct mlx5_fc *mlx5_fc_acquire(struct mlx5_core_dev *dev, bool aging) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc *counter; + + if (aging && MLX5_CAP_GEN(dev, flow_counter_bulk_alloc) != 0) { + counter = mlx5_fc_pool_acquire_counter(&fc_stats->fc_pool); + if (!IS_ERR(counter)) + return counter; + } + + return mlx5_fc_single_alloc(dev); +} + +struct mlx5_fc *mlx5_fc_create_ex(struct mlx5_core_dev *dev, bool aging) +{ + struct mlx5_fc *counter = mlx5_fc_acquire(dev, aging); + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + int err; + + if (IS_ERR(counter)) + return counter; + + INIT_LIST_HEAD(&counter->list); + counter->aging = aging; + + if (aging) { + u32 id = counter->id; + + counter->cache.lastuse = jiffies; + counter->lastbytes = counter->cache.bytes; + counter->lastpackets = counter->cache.packets; + + idr_preload(GFP_KERNEL); + spin_lock(&fc_stats->counters_idr_lock); + + err = idr_alloc_u32(&fc_stats->counters_idr, counter, &id, id, + GFP_NOWAIT); + + spin_unlock(&fc_stats->counters_idr_lock); + idr_preload_end(); + if (err) + goto err_out_alloc; + + llist_add(&counter->addlist, &fc_stats->addlist); + } + + return counter; + +err_out_alloc: + mlx5_fc_release(dev, counter); + return ERR_PTR(err); +} + +struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) +{ + struct mlx5_fc *counter = mlx5_fc_create_ex(dev, aging); + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + if (aging) + mod_delayed_work(fc_stats->wq, &fc_stats->work, 0); + return counter; +} +EXPORT_SYMBOL(mlx5_fc_create); + +u32 mlx5_fc_id(struct mlx5_fc *counter) +{ + return counter->id; +} +EXPORT_SYMBOL(mlx5_fc_id); + +void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + if (!counter) + return; + + if (counter->aging) { + llist_add(&counter->dellist, &fc_stats->dellist); + mod_delayed_work(fc_stats->wq, &fc_stats->work, 0); + return; + } + + mlx5_fc_release(dev, counter); +} +EXPORT_SYMBOL(mlx5_fc_destroy); + +int mlx5_init_fc_stats(struct mlx5_core_dev *dev) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + int init_bulk_len; + int init_out_len; + + spin_lock_init(&fc_stats->counters_idr_lock); + idr_init(&fc_stats->counters_idr); + INIT_LIST_HEAD(&fc_stats->counters); + init_llist_head(&fc_stats->addlist); + init_llist_head(&fc_stats->dellist); + + init_bulk_len = get_init_bulk_query_len(dev); + init_out_len = mlx5_cmd_fc_get_bulk_query_out_len(init_bulk_len); + fc_stats->bulk_query_out = kzalloc(init_out_len, GFP_KERNEL); + if (!fc_stats->bulk_query_out) + return -ENOMEM; + fc_stats->bulk_query_len = init_bulk_len; + + fc_stats->wq = create_singlethread_workqueue("mlx5_fc"); + if (!fc_stats->wq) + goto err_wq_create; + + fc_stats->sampling_interval = MLX5_FC_STATS_PERIOD; + INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work); + + mlx5_fc_pool_init(&fc_stats->fc_pool, dev); + return 0; + +err_wq_create: + kfree(fc_stats->bulk_query_out); + return -ENOMEM; +} + +void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct llist_node *tmplist; + struct mlx5_fc *counter; + struct mlx5_fc *tmp; + + cancel_delayed_work_sync(&dev->priv.fc_stats.work); + destroy_workqueue(dev->priv.fc_stats.wq); + dev->priv.fc_stats.wq = NULL; + + tmplist = llist_del_all(&fc_stats->addlist); + llist_for_each_entry_safe(counter, tmp, tmplist, addlist) + mlx5_fc_release(dev, counter); + + list_for_each_entry_safe(counter, tmp, &fc_stats->counters, list) + mlx5_fc_release(dev, counter); + + mlx5_fc_pool_cleanup(&fc_stats->fc_pool); + idr_destroy(&fc_stats->counters_idr); + kfree(fc_stats->bulk_query_out); +} + +int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter, + u64 *packets, u64 *bytes) +{ + return mlx5_cmd_fc_query(dev, counter->id, packets, bytes); +} +EXPORT_SYMBOL(mlx5_fc_query); + +u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter) +{ + return counter->cache.lastuse; +} + +void mlx5_fc_query_cached(struct mlx5_fc *counter, + u64 *bytes, u64 *packets, u64 *lastuse) +{ + struct mlx5_fc_cache c; + + c = counter->cache; + + *bytes = c.bytes - counter->lastbytes; + *packets = c.packets - counter->lastpackets; + *lastuse = c.lastuse; + + counter->lastbytes = c.bytes; + counter->lastpackets = c.packets; +} + +void mlx5_fc_query_cached_raw(struct mlx5_fc *counter, + u64 *bytes, u64 *packets, u64 *lastuse) +{ + struct mlx5_fc_cache c = counter->cache; + + *bytes = c.bytes; + *packets = c.packets; + *lastuse = c.lastuse; +} + +void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev, + struct delayed_work *dwork, + unsigned long delay) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + queue_delayed_work(fc_stats->wq, dwork, delay); +} + +void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, + unsigned long interval) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + fc_stats->sampling_interval = min_t(unsigned long, interval, + fc_stats->sampling_interval); +} + +/* Flow counter bluks */ + +struct mlx5_fc_bulk { + struct list_head pool_list; + u32 base_id; + int bulk_len; + unsigned long *bitmask; + struct mlx5_fc fcs[]; +}; + +static void mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk, + u32 id) +{ + counter->bulk = bulk; + counter->id = id; +} + +static int mlx5_fc_bulk_get_free_fcs_amount(struct mlx5_fc_bulk *bulk) +{ + return bitmap_weight(bulk->bitmask, bulk->bulk_len); +} + +static struct mlx5_fc_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev) +{ + enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask; + struct mlx5_fc_bulk *bulk; + int err = -ENOMEM; + int bulk_len; + u32 base_id; + int i; + + alloc_bitmask = MLX5_CAP_GEN(dev, flow_counter_bulk_alloc); + bulk_len = alloc_bitmask > 0 ? MLX5_FC_BULK_NUM_FCS(alloc_bitmask) : 1; + + bulk = kvzalloc(struct_size(bulk, fcs, bulk_len), GFP_KERNEL); + if (!bulk) + goto err_alloc_bulk; + + bulk->bitmask = kvcalloc(BITS_TO_LONGS(bulk_len), sizeof(unsigned long), + GFP_KERNEL); + if (!bulk->bitmask) + goto err_alloc_bitmask; + + err = mlx5_cmd_fc_bulk_alloc(dev, alloc_bitmask, &base_id); + if (err) + goto err_mlx5_cmd_bulk_alloc; + + bulk->base_id = base_id; + bulk->bulk_len = bulk_len; + for (i = 0; i < bulk_len; i++) { + mlx5_fc_init(&bulk->fcs[i], bulk, base_id + i); + set_bit(i, bulk->bitmask); + } + + return bulk; + +err_mlx5_cmd_bulk_alloc: + kvfree(bulk->bitmask); +err_alloc_bitmask: + kvfree(bulk); +err_alloc_bulk: + return ERR_PTR(err); +} + +static int +mlx5_fc_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fc_bulk *bulk) +{ + if (mlx5_fc_bulk_get_free_fcs_amount(bulk) < bulk->bulk_len) { + mlx5_core_err(dev, "Freeing bulk before all counters were released\n"); + return -EBUSY; + } + + mlx5_cmd_fc_free(dev, bulk->base_id); + kvfree(bulk->bitmask); + kvfree(bulk); + + return 0; +} + +static struct mlx5_fc *mlx5_fc_bulk_acquire_fc(struct mlx5_fc_bulk *bulk) +{ + int free_fc_index = find_first_bit(bulk->bitmask, bulk->bulk_len); + + if (free_fc_index >= bulk->bulk_len) + return ERR_PTR(-ENOSPC); + + clear_bit(free_fc_index, bulk->bitmask); + return &bulk->fcs[free_fc_index]; +} + +static int mlx5_fc_bulk_release_fc(struct mlx5_fc_bulk *bulk, struct mlx5_fc *fc) +{ + int fc_index = fc->id - bulk->base_id; + + if (test_bit(fc_index, bulk->bitmask)) + return -EINVAL; + + set_bit(fc_index, bulk->bitmask); + return 0; +} + +/* Flow counters pool API */ + +static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev) +{ + fc_pool->dev = dev; + mutex_init(&fc_pool->pool_lock); + INIT_LIST_HEAD(&fc_pool->fully_used); + INIT_LIST_HEAD(&fc_pool->partially_used); + INIT_LIST_HEAD(&fc_pool->unused); + fc_pool->available_fcs = 0; + fc_pool->used_fcs = 0; + fc_pool->threshold = 0; +} + +static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool) +{ + struct mlx5_core_dev *dev = fc_pool->dev; + struct mlx5_fc_bulk *bulk; + struct mlx5_fc_bulk *tmp; + + list_for_each_entry_safe(bulk, tmp, &fc_pool->fully_used, pool_list) + mlx5_fc_bulk_destroy(dev, bulk); + list_for_each_entry_safe(bulk, tmp, &fc_pool->partially_used, pool_list) + mlx5_fc_bulk_destroy(dev, bulk); + list_for_each_entry_safe(bulk, tmp, &fc_pool->unused, pool_list) + mlx5_fc_bulk_destroy(dev, bulk); +} + +static void mlx5_fc_pool_update_threshold(struct mlx5_fc_pool *fc_pool) +{ + fc_pool->threshold = min_t(int, MLX5_FC_POOL_MAX_THRESHOLD, + fc_pool->used_fcs / MLX5_FC_POOL_USED_BUFF_RATIO); +} + +static struct mlx5_fc_bulk * +mlx5_fc_pool_alloc_new_bulk(struct mlx5_fc_pool *fc_pool) +{ + struct mlx5_core_dev *dev = fc_pool->dev; + struct mlx5_fc_bulk *new_bulk; + + new_bulk = mlx5_fc_bulk_create(dev); + if (!IS_ERR(new_bulk)) + fc_pool->available_fcs += new_bulk->bulk_len; + mlx5_fc_pool_update_threshold(fc_pool); + return new_bulk; +} + +static void +mlx5_fc_pool_free_bulk(struct mlx5_fc_pool *fc_pool, struct mlx5_fc_bulk *bulk) +{ + struct mlx5_core_dev *dev = fc_pool->dev; + + fc_pool->available_fcs -= bulk->bulk_len; + mlx5_fc_bulk_destroy(dev, bulk); + mlx5_fc_pool_update_threshold(fc_pool); +} + +static struct mlx5_fc * +mlx5_fc_pool_acquire_from_list(struct list_head *src_list, + struct list_head *next_list, + bool move_non_full_bulk) +{ + struct mlx5_fc_bulk *bulk; + struct mlx5_fc *fc; + + if (list_empty(src_list)) + return ERR_PTR(-ENODATA); + + bulk = list_first_entry(src_list, struct mlx5_fc_bulk, pool_list); + fc = mlx5_fc_bulk_acquire_fc(bulk); + if (move_non_full_bulk || mlx5_fc_bulk_get_free_fcs_amount(bulk) == 0) + list_move(&bulk->pool_list, next_list); + return fc; +} + +static struct mlx5_fc * +mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool) +{ + struct mlx5_fc_bulk *new_bulk; + struct mlx5_fc *fc; + + mutex_lock(&fc_pool->pool_lock); + + fc = mlx5_fc_pool_acquire_from_list(&fc_pool->partially_used, + &fc_pool->fully_used, false); + if (IS_ERR(fc)) + fc = mlx5_fc_pool_acquire_from_list(&fc_pool->unused, + &fc_pool->partially_used, + true); + if (IS_ERR(fc)) { + new_bulk = mlx5_fc_pool_alloc_new_bulk(fc_pool); + if (IS_ERR(new_bulk)) { + fc = ERR_CAST(new_bulk); + goto out; + } + fc = mlx5_fc_bulk_acquire_fc(new_bulk); + list_add(&new_bulk->pool_list, &fc_pool->partially_used); + } + fc_pool->available_fcs--; + fc_pool->used_fcs++; + +out: + mutex_unlock(&fc_pool->pool_lock); + return fc; +} + +static void +mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct mlx5_fc *fc) +{ + struct mlx5_core_dev *dev = fc_pool->dev; + struct mlx5_fc_bulk *bulk = fc->bulk; + int bulk_free_fcs_amount; + + mutex_lock(&fc_pool->pool_lock); + + if (mlx5_fc_bulk_release_fc(bulk, fc)) { + mlx5_core_warn(dev, "Attempted to release a counter which is not acquired\n"); + goto unlock; + } + + fc_pool->available_fcs++; + fc_pool->used_fcs--; + + bulk_free_fcs_amount = mlx5_fc_bulk_get_free_fcs_amount(bulk); + if (bulk_free_fcs_amount == 1) + list_move_tail(&bulk->pool_list, &fc_pool->partially_used); + if (bulk_free_fcs_amount == bulk->bulk_len) { + list_del(&bulk->pool_list); + if (fc_pool->available_fcs > fc_pool->threshold) + mlx5_fc_pool_free_bulk(fc_pool, bulk); + else + list_add(&bulk->pool_list, &fc_pool->unused); + } + +unlock: + mutex_unlock(&fc_pool->pool_lock); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c new file mode 100644 index 0000000000..c14590acc7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021 Mellanox Technologies. */ + +#include "fs_ft_pool.h" + +/* Firmware currently has 4 pool of 4 sizes that it supports (FT_POOLS), + * and a virtual memory region of 16M (MLX5_FT_SIZE), this region is duplicated + * for each flow table pool. We can allocate up to 16M of each pool, + * and we keep track of how much we used via mlx5_ft_pool_get_avail_sz. + * Firmware doesn't report any of this for now. + * ESW_POOL is expected to be sorted from large to small and match firmware + * pools. + */ +#define FT_SIZE (16 * 1024 * 1024) +static const unsigned int FT_POOLS[] = { 4 * 1024 * 1024, + 1 * 1024 * 1024, + 64 * 1024, + 128, + 1 /* size for termination tables */ }; +struct mlx5_ft_pool { + int ft_left[ARRAY_SIZE(FT_POOLS)]; +}; + +int mlx5_ft_pool_init(struct mlx5_core_dev *dev) +{ + struct mlx5_ft_pool *ft_pool; + int i; + + ft_pool = kzalloc(sizeof(*ft_pool), GFP_KERNEL); + if (!ft_pool) + return -ENOMEM; + + for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--) + ft_pool->ft_left[i] = FT_SIZE / FT_POOLS[i]; + + dev->priv.ft_pool = ft_pool; + return 0; +} + +void mlx5_ft_pool_destroy(struct mlx5_core_dev *dev) +{ + kfree(dev->priv.ft_pool); +} + +int +mlx5_ft_pool_get_avail_sz(struct mlx5_core_dev *dev, enum fs_flow_table_type table_type, + int desired_size) +{ + u32 max_ft_size = 1 << MLX5_CAP_FLOWTABLE_TYPE(dev, log_max_ft_size, table_type); + int i, found_i = -1; + + for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--) { + if (dev->priv.ft_pool->ft_left[i] && FT_POOLS[i] >= desired_size && + FT_POOLS[i] <= max_ft_size) { + found_i = i; + if (desired_size != POOL_NEXT_SIZE) + break; + } + } + + if (found_i != -1) { + --dev->priv.ft_pool->ft_left[found_i]; + return FT_POOLS[found_i]; + } + + return 0; +} + +void +mlx5_ft_pool_put_sz(struct mlx5_core_dev *dev, int sz) +{ + int i; + + if (!sz) + return; + + for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--) { + if (sz == FT_POOLS[i]) { + ++dev->priv.ft_pool->ft_left[i]; + return; + } + } + + WARN_ONCE(1, "Couldn't find size %d in flow table size pool", sz); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h new file mode 100644 index 0000000000..25f4274b37 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5_FS_FT_POOL_H__ +#define __MLX5_FS_FT_POOL_H__ + +#include +#include "fs_core.h" + +#define POOL_NEXT_SIZE 0 + +int mlx5_ft_pool_init(struct mlx5_core_dev *dev); +void mlx5_ft_pool_destroy(struct mlx5_core_dev *dev); + +int +mlx5_ft_pool_get_avail_sz(struct mlx5_core_dev *dev, enum fs_flow_table_type table_type, + int desired_size); +void +mlx5_ft_pool_put_sz(struct mlx5_core_dev *dev, int sz); + +#endif /* __MLX5_FS_FT_POOL_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c new file mode 100644 index 0000000000..58f4c0d0fa --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -0,0 +1,845 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include "mlx5_core.h" +#include "../../mlxfw/mlxfw.h" +#include "lib/tout.h" + +enum { + MCQS_IDENTIFIER_BOOT_IMG = 0x1, + MCQS_IDENTIFIER_OEM_NVCONFIG = 0x4, + MCQS_IDENTIFIER_MLNX_NVCONFIG = 0x5, + MCQS_IDENTIFIER_CS_TOKEN = 0x6, + MCQS_IDENTIFIER_DBG_TOKEN = 0x7, + MCQS_IDENTIFIER_GEARBOX = 0xA, +}; + +enum { + MCQS_UPDATE_STATE_IDLE, + MCQS_UPDATE_STATE_IN_PROGRESS, + MCQS_UPDATE_STATE_APPLIED, + MCQS_UPDATE_STATE_ACTIVE, + MCQS_UPDATE_STATE_ACTIVE_PENDING_RESET, + MCQS_UPDATE_STATE_FAILED, + MCQS_UPDATE_STATE_CANCELED, + MCQS_UPDATE_STATE_BUSY, +}; + +enum { + MCQI_INFO_TYPE_CAPABILITIES = 0x0, + MCQI_INFO_TYPE_VERSION = 0x1, + MCQI_INFO_TYPE_ACTIVATION_METHOD = 0x5, +}; + +enum { + MCQI_FW_RUNNING_VERSION = 0, + MCQI_FW_STORED_VERSION = 1, +}; + +int mlx5_query_board_id(struct mlx5_core_dev *dev) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_adapter_out); + u32 in[MLX5_ST_SZ_DW(query_adapter_in)] = {}; + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_adapter_in, in, opcode, MLX5_CMD_OP_QUERY_ADAPTER); + err = mlx5_cmd_exec_inout(dev, query_adapter, in, out); + if (err) + goto out; + + memcpy(dev->board_id, + MLX5_ADDR_OF(query_adapter_out, out, + query_adapter_struct.vsd_contd_psid), + MLX5_FLD_SZ_BYTES(query_adapter_out, + query_adapter_struct.vsd_contd_psid)); + +out: + kfree(out); + return err; +} + +int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_adapter_out); + u32 in[MLX5_ST_SZ_DW(query_adapter_in)] = {}; + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_adapter_in, in, opcode, MLX5_CMD_OP_QUERY_ADAPTER); + err = mlx5_cmd_exec_inout(mdev, query_adapter, in, out); + if (err) + goto out; + + *vendor_id = MLX5_GET(query_adapter_out, out, + query_adapter_struct.ieee_vendor_id); +out: + kfree(out); + return err; +} +EXPORT_SYMBOL(mlx5_core_query_vendor_id); + +static int mlx5_get_pcam_reg(struct mlx5_core_dev *dev) +{ + return mlx5_query_pcam_reg(dev, dev->caps.pcam, + MLX5_PCAM_FEATURE_ENHANCED_FEATURES, + MLX5_PCAM_REGS_5000_TO_507F); +} + +static int mlx5_get_mcam_access_reg_group(struct mlx5_core_dev *dev, + enum mlx5_mcam_reg_groups group) +{ + return mlx5_query_mcam_reg(dev, dev->caps.mcam[group], + MLX5_MCAM_FEATURE_ENHANCED_FEATURES, group); +} + +static int mlx5_get_qcam_reg(struct mlx5_core_dev *dev) +{ + return mlx5_query_qcam_reg(dev, dev->caps.qcam, + MLX5_QCAM_FEATURE_ENHANCED_FEATURES, + MLX5_QCAM_REGS_FIRST_128); +} + +int mlx5_query_hca_caps(struct mlx5_core_dev *dev) +{ + int err; + + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + + if (MLX5_CAP_GEN(dev, port_selection_cap)) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_PORT_SELECTION, HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, hca_cap_2)) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_GENERAL_2, HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, eth_net_offloads)) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_ETHERNET_OFFLOADS, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_IPOIB_ENHANCED_OFFLOADS, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, pg)) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_ODP, HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, atomic)) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_ATOMIC, HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, roce)) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_ROCE, HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, nic_flow_table) || + MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_FLOW_TABLE, HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + + if (MLX5_ESWITCH_MANAGER(dev)) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_ESWITCH_FLOW_TABLE, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_ESWITCH, HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, qos)) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_QOS, HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, debug)) + mlx5_core_get_caps_mode(dev, MLX5_CAP_DEBUG, HCA_CAP_OPMOD_GET_CUR); + + if (MLX5_CAP_GEN(dev, pcam_reg)) + mlx5_get_pcam_reg(dev); + + if (MLX5_CAP_GEN(dev, mcam_reg)) { + mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_FIRST_128); + mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_0x9100_0x917F); + } + + if (MLX5_CAP_GEN(dev, qcam_reg)) + mlx5_get_qcam_reg(dev); + + if (MLX5_CAP_GEN(dev, device_memory)) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_DEV_MEM, HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, event_cap)) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_DEV_EVENT, HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, tls_tx) || MLX5_CAP_GEN(dev, tls_rx)) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_TLS, HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + + if (MLX5_CAP_GEN_64(dev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_VDPA_EMULATION, HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, ipsec_offload)) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_IPSEC, HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, crypto)) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_CRYPTO, HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + + if (MLX5_CAP_GEN_64(dev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_MACSEC, HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, adv_virtualization)) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_ADV_VIRTUALIZATION, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + + return 0; +} + +int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id) +{ + u32 in[MLX5_ST_SZ_DW(init_hca_in)] = {}; + int i; + + MLX5_SET(init_hca_in, in, opcode, MLX5_CMD_OP_INIT_HCA); + + if (MLX5_CAP_GEN(dev, sw_owner_id)) { + for (i = 0; i < 4; i++) + MLX5_ARRAY_SET(init_hca_in, in, sw_owner_id, i, + sw_owner_id[i]); + } + + if (MLX5_CAP_GEN_2_MAX(dev, sw_vhca_id_valid) && + dev->priv.sw_vhca_id > 0) + MLX5_SET(init_hca_in, in, sw_vhca_id, dev->priv.sw_vhca_id); + + return mlx5_cmd_exec_in(dev, init_hca, in); +} + +int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev) +{ + u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {}; + + MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA); + return mlx5_cmd_exec_in(dev, teardown_hca, in); +} + +int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {0}; + int force_state; + int ret; + + if (!MLX5_CAP_GEN(dev, force_teardown)) { + mlx5_core_dbg(dev, "force teardown is not supported in the firmware\n"); + return -EOPNOTSUPP; + } + + MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA); + MLX5_SET(teardown_hca_in, in, profile, MLX5_TEARDOWN_HCA_IN_PROFILE_FORCE_CLOSE); + + ret = mlx5_cmd_exec_polling(dev, in, sizeof(in), out, sizeof(out)); + if (ret) + return ret; + + force_state = MLX5_GET(teardown_hca_out, out, state); + if (force_state == MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL) { + mlx5_core_warn(dev, "teardown with force mode failed, doing normal teardown\n"); + return -EIO; + } + + return 0; +} + +int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev) +{ + unsigned long end, delay_ms = mlx5_tout_ms(dev, TEARDOWN); + u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {}; + int state; + int ret; + + if (!MLX5_CAP_GEN(dev, fast_teardown)) { + mlx5_core_dbg(dev, "fast teardown is not supported in the firmware\n"); + return -EOPNOTSUPP; + } + + MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA); + MLX5_SET(teardown_hca_in, in, profile, + MLX5_TEARDOWN_HCA_IN_PROFILE_PREPARE_FAST_TEARDOWN); + + ret = mlx5_cmd_exec_inout(dev, teardown_hca, in, out); + if (ret) + return ret; + + state = MLX5_GET(teardown_hca_out, out, state); + if (state == MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL) { + mlx5_core_warn(dev, "teardown with fast mode failed\n"); + return -EIO; + } + + mlx5_set_nic_state(dev, MLX5_NIC_IFC_DISABLED); + + /* Loop until device state turns to disable */ + end = jiffies + msecs_to_jiffies(delay_ms); + do { + if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) + break; + + cond_resched(); + } while (!time_after(jiffies, end)); + + if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) { + dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n", + mlx5_get_nic_state(dev), delay_ms); + return -EIO; + } + + return 0; +} + +enum mlxsw_reg_mcc_instruction { + MLX5_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE = 0x01, + MLX5_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE = 0x02, + MLX5_REG_MCC_INSTRUCTION_UPDATE_COMPONENT = 0x03, + MLX5_REG_MCC_INSTRUCTION_VERIFY_COMPONENT = 0x04, + MLX5_REG_MCC_INSTRUCTION_ACTIVATE = 0x06, + MLX5_REG_MCC_INSTRUCTION_CANCEL = 0x08, +}; + +static int mlx5_reg_mcc_set(struct mlx5_core_dev *dev, + enum mlxsw_reg_mcc_instruction instr, + u16 component_index, u32 update_handle, + u32 component_size) +{ + u32 out[MLX5_ST_SZ_DW(mcc_reg)]; + u32 in[MLX5_ST_SZ_DW(mcc_reg)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(mcc_reg, in, instruction, instr); + MLX5_SET(mcc_reg, in, component_index, component_index); + MLX5_SET(mcc_reg, in, update_handle, update_handle); + MLX5_SET(mcc_reg, in, component_size, component_size); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MCC, 0, 1); +} + +static int mlx5_reg_mcc_query(struct mlx5_core_dev *dev, + u32 *update_handle, u8 *error_code, + u8 *control_state) +{ + u32 out[MLX5_ST_SZ_DW(mcc_reg)]; + u32 in[MLX5_ST_SZ_DW(mcc_reg)]; + int err; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + MLX5_SET(mcc_reg, in, update_handle, *update_handle); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MCC, 0, 0); + if (err) + goto out; + + *update_handle = MLX5_GET(mcc_reg, out, update_handle); + *error_code = MLX5_GET(mcc_reg, out, error_code); + *control_state = MLX5_GET(mcc_reg, out, control_state); + +out: + return err; +} + +static int mlx5_reg_mcda_set(struct mlx5_core_dev *dev, + u32 update_handle, + u32 offset, u16 size, + u8 *data) +{ + int err, in_size = MLX5_ST_SZ_BYTES(mcda_reg) + size; + u32 out[MLX5_ST_SZ_DW(mcda_reg)]; + int i, j, dw_size = size >> 2; + __be32 data_element; + u32 *in; + + in = kzalloc(in_size, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(mcda_reg, in, update_handle, update_handle); + MLX5_SET(mcda_reg, in, offset, offset); + MLX5_SET(mcda_reg, in, size, size); + + for (i = 0; i < dw_size; i++) { + j = i * 4; + data_element = htonl(*(u32 *)&data[j]); + memcpy(MLX5_ADDR_OF(mcda_reg, in, data) + j, &data_element, 4); + } + + err = mlx5_core_access_reg(dev, in, in_size, out, + sizeof(out), MLX5_REG_MCDA, 0, 1); + kfree(in); + return err; +} + +static int mlx5_reg_mcqi_query(struct mlx5_core_dev *dev, + u16 component_index, bool read_pending, + u8 info_type, u16 data_size, void *mcqi_data) +{ + u32 out[MLX5_ST_SZ_DW(mcqi_reg) + MLX5_UN_SZ_DW(mcqi_reg_data)] = {}; + u32 in[MLX5_ST_SZ_DW(mcqi_reg)] = {}; + void *data; + int err; + + MLX5_SET(mcqi_reg, in, component_index, component_index); + MLX5_SET(mcqi_reg, in, read_pending_component, read_pending); + MLX5_SET(mcqi_reg, in, info_type, info_type); + MLX5_SET(mcqi_reg, in, data_size, data_size); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + MLX5_ST_SZ_BYTES(mcqi_reg) + data_size, + MLX5_REG_MCQI, 0, 0); + if (err) + return err; + + data = MLX5_ADDR_OF(mcqi_reg, out, data); + memcpy(mcqi_data, data, data_size); + + return 0; +} + +static int mlx5_reg_mcqi_caps_query(struct mlx5_core_dev *dev, u16 component_index, + u32 *max_component_size, u8 *log_mcda_word_size, + u16 *mcda_max_write_size) +{ + u32 mcqi_reg[MLX5_ST_SZ_DW(mcqi_cap)] = {}; + int err; + + err = mlx5_reg_mcqi_query(dev, component_index, 0, + MCQI_INFO_TYPE_CAPABILITIES, + MLX5_ST_SZ_BYTES(mcqi_cap), mcqi_reg); + if (err) + return err; + + *max_component_size = MLX5_GET(mcqi_cap, mcqi_reg, max_component_size); + *log_mcda_word_size = MLX5_GET(mcqi_cap, mcqi_reg, log_mcda_word_size); + *mcda_max_write_size = MLX5_GET(mcqi_cap, mcqi_reg, mcda_max_write_size); + + return 0; +} + +struct mlx5_mlxfw_dev { + struct mlxfw_dev mlxfw_dev; + struct mlx5_core_dev *mlx5_core_dev; +}; + +static int mlx5_component_query(struct mlxfw_dev *mlxfw_dev, + u16 component_index, u32 *p_max_size, + u8 *p_align_bits, u16 *p_max_write_size) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + if (!MLX5_CAP_GEN(dev, mcam_reg) || !MLX5_CAP_MCAM_REG(dev, mcqi)) { + mlx5_core_warn(dev, "caps query isn't supported by running FW\n"); + return -EOPNOTSUPP; + } + + return mlx5_reg_mcqi_caps_query(dev, component_index, p_max_size, + p_align_bits, p_max_write_size); +} + +static int mlx5_fsm_lock(struct mlxfw_dev *mlxfw_dev, u32 *fwhandle) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + u8 control_state, error_code; + int err; + + *fwhandle = 0; + err = mlx5_reg_mcc_query(dev, fwhandle, &error_code, &control_state); + if (err) + return err; + + if (control_state != MLXFW_FSM_STATE_IDLE) + return -EBUSY; + + return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE, + 0, *fwhandle, 0); +} + +static int mlx5_fsm_component_update(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u16 component_index, u32 component_size) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_UPDATE_COMPONENT, + component_index, fwhandle, component_size); +} + +static int mlx5_fsm_block_download(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u8 *data, u16 size, u32 offset) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + return mlx5_reg_mcda_set(dev, fwhandle, offset, size, data); +} + +static int mlx5_fsm_component_verify(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + u16 component_index) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_VERIFY_COMPONENT, + component_index, fwhandle, 0); +} + +static int mlx5_fsm_activate(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_ACTIVATE, 0, + fwhandle, 0); +} + +static int mlx5_fsm_query_state(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, + enum mlxfw_fsm_state *fsm_state, + enum mlxfw_fsm_state_err *fsm_state_err) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + u8 control_state, error_code; + int err; + + err = mlx5_reg_mcc_query(dev, &fwhandle, &error_code, &control_state); + if (err) + return err; + + *fsm_state = control_state; + *fsm_state_err = min_t(enum mlxfw_fsm_state_err, error_code, + MLXFW_FSM_STATE_ERR_MAX); + return 0; +} + +static void mlx5_fsm_cancel(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_CANCEL, 0, fwhandle, 0); +} + +static void mlx5_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + + mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE, 0, + fwhandle, 0); +} + +static int mlx5_fsm_reactivate(struct mlxfw_dev *mlxfw_dev, u8 *status) +{ + struct mlx5_mlxfw_dev *mlx5_mlxfw_dev = + container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); + struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; + u32 out[MLX5_ST_SZ_DW(mirc_reg)]; + u32 in[MLX5_ST_SZ_DW(mirc_reg)]; + unsigned long exp_time; + int err; + + exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FSM_REACTIVATE)); + + if (!MLX5_CAP_MCAM_REG2(dev, mirc)) + return -EOPNOTSUPP; + + memset(in, 0, sizeof(in)); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MIRC, 0, 1); + if (err) + return err; + + do { + memset(out, 0, sizeof(out)); + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MIRC, 0, 0); + if (err) + return err; + + *status = MLX5_GET(mirc_reg, out, status_code); + if (*status != MLXFW_FSM_REACTIVATE_STATUS_BUSY) + return 0; + + msleep(20); + } while (time_before(jiffies, exp_time)); + + return 0; +} + +static const struct mlxfw_dev_ops mlx5_mlxfw_dev_ops = { + .component_query = mlx5_component_query, + .fsm_lock = mlx5_fsm_lock, + .fsm_component_update = mlx5_fsm_component_update, + .fsm_block_download = mlx5_fsm_block_download, + .fsm_component_verify = mlx5_fsm_component_verify, + .fsm_activate = mlx5_fsm_activate, + .fsm_reactivate = mlx5_fsm_reactivate, + .fsm_query_state = mlx5_fsm_query_state, + .fsm_cancel = mlx5_fsm_cancel, + .fsm_release = mlx5_fsm_release +}; + +int mlx5_firmware_flash(struct mlx5_core_dev *dev, + const struct firmware *firmware, + struct netlink_ext_ack *extack) +{ + struct mlx5_mlxfw_dev mlx5_mlxfw_dev = { + .mlxfw_dev = { + .ops = &mlx5_mlxfw_dev_ops, + .psid = dev->board_id, + .psid_size = strlen(dev->board_id), + .devlink = priv_to_devlink(dev), + }, + .mlx5_core_dev = dev + }; + + if (!MLX5_CAP_GEN(dev, mcam_reg) || + !MLX5_CAP_MCAM_REG(dev, mcqi) || + !MLX5_CAP_MCAM_REG(dev, mcc) || + !MLX5_CAP_MCAM_REG(dev, mcda)) { + pr_info("%s flashing isn't supported by the running FW\n", __func__); + return -EOPNOTSUPP; + } + + return mlxfw_firmware_flash(&mlx5_mlxfw_dev.mlxfw_dev, + firmware, extack); +} + +static int mlx5_reg_mcqi_version_query(struct mlx5_core_dev *dev, + u16 component_index, bool read_pending, + u32 *mcqi_version_out) +{ + return mlx5_reg_mcqi_query(dev, component_index, read_pending, + MCQI_INFO_TYPE_VERSION, + MLX5_ST_SZ_BYTES(mcqi_version), + mcqi_version_out); +} + +static int mlx5_reg_mcqs_query(struct mlx5_core_dev *dev, u32 *out, + u16 component_index) +{ + u8 out_sz = MLX5_ST_SZ_BYTES(mcqs_reg); + u32 in[MLX5_ST_SZ_DW(mcqs_reg)] = {}; + int err; + + memset(out, 0, out_sz); + + MLX5_SET(mcqs_reg, in, component_index, component_index); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + out_sz, MLX5_REG_MCQS, 0, 0); + return err; +} + +/* scans component index sequentially, to find the boot img index */ +static int mlx5_get_boot_img_component_index(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(mcqs_reg)] = {}; + u16 identifier, component_idx = 0; + bool quit; + int err; + + do { + err = mlx5_reg_mcqs_query(dev, out, component_idx); + if (err) + return err; + + identifier = MLX5_GET(mcqs_reg, out, identifier); + quit = !!MLX5_GET(mcqs_reg, out, last_index_flag); + quit |= identifier == MCQS_IDENTIFIER_BOOT_IMG; + } while (!quit && ++component_idx); + + if (identifier != MCQS_IDENTIFIER_BOOT_IMG) { + mlx5_core_warn(dev, "mcqs: can't find boot_img component ix, last scanned idx %d\n", + component_idx); + return -EOPNOTSUPP; + } + + return component_idx; +} + +static int +mlx5_fw_image_pending(struct mlx5_core_dev *dev, + int component_index, + bool *pending_version_exists) +{ + u32 out[MLX5_ST_SZ_DW(mcqs_reg)]; + u8 component_update_state; + int err; + + err = mlx5_reg_mcqs_query(dev, out, component_index); + if (err) + return err; + + component_update_state = MLX5_GET(mcqs_reg, out, component_update_state); + + if (component_update_state == MCQS_UPDATE_STATE_IDLE) { + *pending_version_exists = false; + } else if (component_update_state == MCQS_UPDATE_STATE_ACTIVE_PENDING_RESET) { + *pending_version_exists = true; + } else { + mlx5_core_warn(dev, + "mcqs: can't read pending fw version while fw state is %d\n", + component_update_state); + return -ENODATA; + } + return 0; +} + +int mlx5_fw_version_query(struct mlx5_core_dev *dev, + u32 *running_ver, u32 *pending_ver) +{ + u32 reg_mcqi_version[MLX5_ST_SZ_DW(mcqi_version)] = {}; + bool pending_version_exists; + int component_index; + int err; + + if (!MLX5_CAP_GEN(dev, mcam_reg) || !MLX5_CAP_MCAM_REG(dev, mcqi) || + !MLX5_CAP_MCAM_REG(dev, mcqs)) { + mlx5_core_warn(dev, "fw query isn't supported by the FW\n"); + return -EOPNOTSUPP; + } + + component_index = mlx5_get_boot_img_component_index(dev); + if (component_index < 0) + return component_index; + + err = mlx5_reg_mcqi_version_query(dev, component_index, + MCQI_FW_RUNNING_VERSION, + reg_mcqi_version); + if (err) + return err; + + *running_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version); + + err = mlx5_fw_image_pending(dev, component_index, &pending_version_exists); + if (err) + return err; + + if (!pending_version_exists) { + *pending_ver = 0; + return 0; + } + + err = mlx5_reg_mcqi_version_query(dev, component_index, + MCQI_FW_STORED_VERSION, + reg_mcqi_version); + if (err) + return err; + + *pending_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c new file mode 100644 index 0000000000..c4e19d627d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -0,0 +1,755 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#include + +#include "fw_reset.h" +#include "diag/fw_tracer.h" +#include "lib/tout.h" + +enum { + MLX5_FW_RESET_FLAGS_RESET_REQUESTED, + MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, + MLX5_FW_RESET_FLAGS_PENDING_COMP, + MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, + MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED +}; + +struct mlx5_fw_reset { + struct mlx5_core_dev *dev; + struct mlx5_nb nb; + struct workqueue_struct *wq; + struct work_struct fw_live_patch_work; + struct work_struct reset_request_work; + struct work_struct reset_unload_work; + struct work_struct reset_reload_work; + struct work_struct reset_now_work; + struct work_struct reset_abort_work; + unsigned long reset_flags; + struct timer_list timer; + struct completion done; + int ret; +}; + +enum { + MLX5_FW_RST_STATE_IDLE = 0, + MLX5_FW_RST_STATE_TOGGLE_REQ = 4, +}; + +enum { + MLX5_RST_STATE_BIT_NUM = 12, + MLX5_RST_ACK_BIT_NUM = 22, +}; + +static u8 mlx5_get_fw_rst_state(struct mlx5_core_dev *dev) +{ + return (ioread32be(&dev->iseg->initializing) >> MLX5_RST_STATE_BIT_NUM) & 0xF; +} + +static void mlx5_set_fw_rst_ack(struct mlx5_core_dev *dev) +{ + iowrite32be(BIT(MLX5_RST_ACK_BIT_NUM), &dev->iseg->initializing); +} + +static int mlx5_fw_reset_enable_remote_dev_reset_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_fw_reset *fw_reset; + + fw_reset = dev->priv.fw_reset; + + if (ctx->val.vbool) + clear_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags); + else + set_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags); + return 0; +} + +static int mlx5_fw_reset_enable_remote_dev_reset_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_fw_reset *fw_reset; + + fw_reset = dev->priv.fw_reset; + + ctx->val.vbool = !test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, + &fw_reset->reset_flags); + return 0; +} + +static int mlx5_reg_mfrl_set(struct mlx5_core_dev *dev, u8 reset_level, + u8 reset_type_sel, u8 sync_resp, bool sync_start) +{ + u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {}; + + MLX5_SET(mfrl_reg, in, reset_level, reset_level); + MLX5_SET(mfrl_reg, in, rst_type_sel, reset_type_sel); + MLX5_SET(mfrl_reg, in, pci_sync_for_fw_update_resp, sync_resp); + MLX5_SET(mfrl_reg, in, pci_sync_for_fw_update_start, sync_start); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MFRL, 0, 1); +} + +static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level, + u8 *reset_type, u8 *reset_state) +{ + u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {}; + int err; + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MFRL, 0, 0); + if (err) + return err; + + if (reset_level) + *reset_level = MLX5_GET(mfrl_reg, out, reset_level); + if (reset_type) + *reset_type = MLX5_GET(mfrl_reg, out, reset_type); + if (reset_state) + *reset_state = MLX5_GET(mfrl_reg, out, reset_state); + + return 0; +} + +int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type) +{ + return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL); +} + +static int mlx5_fw_reset_get_reset_state_err(struct mlx5_core_dev *dev, + struct netlink_ext_ack *extack) +{ + u8 reset_state; + + if (mlx5_reg_mfrl_query(dev, NULL, NULL, &reset_state)) + goto out; + + if (!reset_state) + return 0; + + switch (reset_state) { + case MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION: + case MLX5_MFRL_REG_RESET_STATE_RESET_IN_PROGRESS: + NL_SET_ERR_MSG_MOD(extack, "Sync reset still in progress"); + return -EBUSY; + case MLX5_MFRL_REG_RESET_STATE_NEG_TIMEOUT: + NL_SET_ERR_MSG_MOD(extack, "Sync reset negotiation timeout"); + return -ETIMEDOUT; + case MLX5_MFRL_REG_RESET_STATE_NACK: + NL_SET_ERR_MSG_MOD(extack, "One of the hosts disabled reset"); + return -EPERM; + case MLX5_MFRL_REG_RESET_STATE_UNLOAD_TIMEOUT: + NL_SET_ERR_MSG_MOD(extack, "Sync reset unload timeout"); + return -ETIMEDOUT; + } + +out: + NL_SET_ERR_MSG_MOD(extack, "Sync reset failed"); + return -EIO; +} + +int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel, + struct netlink_ext_ack *extack) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {}; + int err, rst_res; + + set_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); + + MLX5_SET(mfrl_reg, in, reset_level, MLX5_MFRL_REG_RESET_LEVEL3); + MLX5_SET(mfrl_reg, in, rst_type_sel, reset_type_sel); + MLX5_SET(mfrl_reg, in, pci_sync_for_fw_update_start, 1); + err = mlx5_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MFRL, 0, 1, false); + if (!err) + return 0; + + clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); + if (err == -EREMOTEIO && MLX5_CAP_MCAM_FEATURE(dev, reset_state)) { + rst_res = mlx5_fw_reset_get_reset_state_err(dev, extack); + return rst_res ? rst_res : err; + } + + NL_SET_ERR_MSG_MOD(extack, "Sync reset command failed"); + return mlx5_cmd_check(dev, err, in, out); +} + +int mlx5_fw_reset_verify_fw_complete(struct mlx5_core_dev *dev, + struct netlink_ext_ack *extack) +{ + u8 rst_state; + int err; + + err = mlx5_fw_reset_get_reset_state_err(dev, extack); + if (err) + return err; + + rst_state = mlx5_get_fw_rst_state(dev); + if (!rst_state) + return 0; + + mlx5_core_err(dev, "Sync reset did not complete, state=%d\n", rst_state); + NL_SET_ERR_MSG_MOD(extack, "Sync reset did not complete successfully"); + return rst_state; +} + +int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev) +{ + return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL0, 0, 0, false); +} + +static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unloaded) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + /* if this is the driver that initiated the fw reset, devlink completed the reload */ + if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) { + complete(&fw_reset->done); + } else { + if (!unloaded) + mlx5_unload_one(dev, false); + if (mlx5_health_wait_pci_up(dev)) + mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); + else + mlx5_load_one(dev, true); + devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0, + BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | + BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE)); + } +} + +static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + del_timer_sync(&fw_reset->timer); +} + +static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + if (!test_and_clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) { + mlx5_core_warn(dev, "Reset request was already cleared\n"); + return -EALREADY; + } + + mlx5_stop_sync_reset_poll(dev); + if (poll_health) + mlx5_start_health_poll(dev); + return 0; +} + +static void mlx5_sync_reset_reload_work(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, + reset_reload_work); + struct mlx5_core_dev *dev = fw_reset->dev; + + mlx5_sync_reset_clear_reset_requested(dev, false); + mlx5_enter_error_state(dev, true); + mlx5_fw_reset_complete_reload(dev, false); +} + +#define MLX5_RESET_POLL_INTERVAL (HZ / 10) +static void poll_sync_reset(struct timer_list *t) +{ + struct mlx5_fw_reset *fw_reset = from_timer(fw_reset, t, timer); + struct mlx5_core_dev *dev = fw_reset->dev; + u32 fatal_error; + + if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) + return; + + fatal_error = mlx5_health_check_fatal_sensors(dev); + + if (fatal_error) { + mlx5_core_warn(dev, "Got Device Reset\n"); + if (!test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags)) + queue_work(fw_reset->wq, &fw_reset->reset_reload_work); + else + mlx5_core_err(dev, "Device is being removed, Drop new reset work\n"); + return; + } + + mod_timer(&fw_reset->timer, round_jiffies(jiffies + MLX5_RESET_POLL_INTERVAL)); +} + +static void mlx5_start_sync_reset_poll(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + timer_setup(&fw_reset->timer, poll_sync_reset, 0); + fw_reset->timer.expires = round_jiffies(jiffies + MLX5_RESET_POLL_INTERVAL); + add_timer(&fw_reset->timer); +} + +static int mlx5_fw_reset_set_reset_sync_ack(struct mlx5_core_dev *dev) +{ + return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 1, false); +} + +static int mlx5_fw_reset_set_reset_sync_nack(struct mlx5_core_dev *dev) +{ + return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 2, false); +} + +static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + if (test_and_set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) { + mlx5_core_warn(dev, "Reset request was already set\n"); + return -EALREADY; + } + mlx5_stop_health_poll(dev, true); + mlx5_start_sync_reset_poll(dev); + return 0; +} + +static void mlx5_fw_live_patch_event(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, + fw_live_patch_work); + struct mlx5_core_dev *dev = fw_reset->dev; + + mlx5_core_info(dev, "Live patch updated firmware version: %d.%d.%d\n", fw_rev_maj(dev), + fw_rev_min(dev), fw_rev_sub(dev)); + + if (mlx5_fw_tracer_reload(dev->tracer)) + mlx5_core_err(dev, "Failed to reload FW tracer\n"); +} + +#if IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE) +static int mlx5_check_hotplug_interrupt(struct mlx5_core_dev *dev) +{ + struct pci_dev *bridge = dev->pdev->bus->self; + u16 reg16; + int err; + + if (!bridge) + return -EOPNOTSUPP; + + err = pcie_capability_read_word(bridge, PCI_EXP_SLTCTL, ®16); + if (err) + return err; + + if ((reg16 & PCI_EXP_SLTCTL_HPIE) && (reg16 & PCI_EXP_SLTCTL_DLLSCE)) { + mlx5_core_warn(dev, "FW reset is not supported as HotPlug is enabled\n"); + return -EOPNOTSUPP; + } + + return 0; +} +#endif + +static int mlx5_check_dev_ids(struct mlx5_core_dev *dev, u16 dev_id) +{ + struct pci_bus *bridge_bus = dev->pdev->bus; + struct pci_dev *sdev; + u16 sdev_id; + int err; + + /* Check that all functions under the pci bridge are PFs of + * this device otherwise fail this function. + */ + list_for_each_entry(sdev, &bridge_bus->devices, bus_list) { + err = pci_read_config_word(sdev, PCI_DEVICE_ID, &sdev_id); + if (err) + return pcibios_err_to_errno(err); + if (sdev_id != dev_id) { + mlx5_core_warn(dev, "unrecognized dev_id (0x%x)\n", sdev_id); + return -EPERM; + } + } + return 0; +} + +static bool mlx5_is_reset_now_capable(struct mlx5_core_dev *dev) +{ + u16 dev_id; + int err; + + if (!MLX5_CAP_GEN(dev, fast_teardown)) { + mlx5_core_warn(dev, "fast teardown is not supported by firmware\n"); + return false; + } + +#if IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE) + err = mlx5_check_hotplug_interrupt(dev); + if (err) + return false; +#endif + + err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id); + if (err) + return false; + return (!mlx5_check_dev_ids(dev, dev_id)); +} + +static void mlx5_sync_reset_request_event(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, + reset_request_work); + struct mlx5_core_dev *dev = fw_reset->dev; + int err; + + if (test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags) || + !mlx5_is_reset_now_capable(dev)) { + err = mlx5_fw_reset_set_reset_sync_nack(dev); + mlx5_core_warn(dev, "PCI Sync FW Update Reset Nack %s", + err ? "Failed" : "Sent"); + return; + } + if (mlx5_sync_reset_set_reset_requested(dev)) + return; + + err = mlx5_fw_reset_set_reset_sync_ack(dev); + if (err) + mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err); + else + mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n"); +} + +static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev) +{ + struct pci_bus *bridge_bus = dev->pdev->bus; + struct pci_dev *bridge = bridge_bus->self; + unsigned long timeout; + struct pci_dev *sdev; + u16 reg16, dev_id; + int cap, err; + + err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id); + if (err) + return pcibios_err_to_errno(err); + err = mlx5_check_dev_ids(dev, dev_id); + if (err) + return err; + cap = pci_find_capability(bridge, PCI_CAP_ID_EXP); + if (!cap) + return -EOPNOTSUPP; + + list_for_each_entry(sdev, &bridge_bus->devices, bus_list) { + pci_save_state(sdev); + pci_cfg_access_lock(sdev); + } + /* PCI link toggle */ + err = pcie_capability_set_word(bridge, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_LD); + if (err) + return pcibios_err_to_errno(err); + msleep(500); + err = pcie_capability_clear_word(bridge, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_LD); + if (err) + return pcibios_err_to_errno(err); + + /* Check link */ + if (!bridge->link_active_reporting) { + mlx5_core_warn(dev, "No PCI link reporting capability\n"); + msleep(1000); + goto restore; + } + + timeout = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, PCI_TOGGLE)); + do { + err = pci_read_config_word(bridge, cap + PCI_EXP_LNKSTA, ®16); + if (err) + return pcibios_err_to_errno(err); + if (reg16 & PCI_EXP_LNKSTA_DLLLA) + break; + msleep(20); + } while (!time_after(jiffies, timeout)); + + if (reg16 & PCI_EXP_LNKSTA_DLLLA) { + mlx5_core_info(dev, "PCI Link up\n"); + } else { + mlx5_core_err(dev, "PCI link not ready (0x%04x) after %llu ms\n", + reg16, mlx5_tout_ms(dev, PCI_TOGGLE)); + err = -ETIMEDOUT; + goto restore; + } + + do { + err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, ®16); + if (err) + return pcibios_err_to_errno(err); + if (reg16 == dev_id) + break; + msleep(20); + } while (!time_after(jiffies, timeout)); + + if (reg16 == dev_id) { + mlx5_core_info(dev, "Firmware responds to PCI config cycles again\n"); + } else { + mlx5_core_err(dev, "Firmware is not responsive (0x%04x) after %llu ms\n", + reg16, mlx5_tout_ms(dev, PCI_TOGGLE)); + err = -ETIMEDOUT; + } + +restore: + list_for_each_entry(sdev, &bridge_bus->devices, bus_list) { + pci_cfg_access_unlock(sdev); + pci_restore_state(sdev); + } + + return err; +} + +static void mlx5_sync_reset_now_event(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, + reset_now_work); + struct mlx5_core_dev *dev = fw_reset->dev; + int err; + + if (mlx5_sync_reset_clear_reset_requested(dev, false)) + return; + + mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n"); + + err = mlx5_cmd_fast_teardown_hca(dev); + if (err) { + mlx5_core_warn(dev, "Fast teardown failed, no reset done, err %d\n", err); + goto done; + } + + err = mlx5_pci_link_toggle(dev); + if (err) { + mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, no reset done, err %d\n", err); + set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags); + } + + mlx5_enter_error_state(dev, true); +done: + fw_reset->ret = err; + mlx5_fw_reset_complete_reload(dev, false); +} + +static void mlx5_sync_reset_unload_event(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset; + struct mlx5_core_dev *dev; + unsigned long timeout; + bool reset_action; + u8 rst_state; + int err; + + fw_reset = container_of(work, struct mlx5_fw_reset, reset_unload_work); + dev = fw_reset->dev; + + if (mlx5_sync_reset_clear_reset_requested(dev, false)) + return; + + mlx5_core_warn(dev, "Sync Reset Unload. Function is forced down.\n"); + + err = mlx5_cmd_fast_teardown_hca(dev); + if (err) + mlx5_core_warn(dev, "Fast teardown failed, unloading, err %d\n", err); + else + mlx5_enter_error_state(dev, true); + + if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) + mlx5_unload_one_devl_locked(dev, false); + else + mlx5_unload_one(dev, false); + + mlx5_set_fw_rst_ack(dev); + mlx5_core_warn(dev, "Sync Reset Unload done, device reset expected\n"); + + reset_action = false; + timeout = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, RESET_UNLOAD)); + do { + rst_state = mlx5_get_fw_rst_state(dev); + if (rst_state == MLX5_FW_RST_STATE_TOGGLE_REQ || + rst_state == MLX5_FW_RST_STATE_IDLE) { + reset_action = true; + break; + } + msleep(20); + } while (!time_after(jiffies, timeout)); + + if (!reset_action) { + mlx5_core_err(dev, "Got timeout waiting for sync reset action, state = %u\n", + rst_state); + fw_reset->ret = -ETIMEDOUT; + goto done; + } + + mlx5_core_warn(dev, "Sync Reset, got reset action. rst_state = %u\n", rst_state); + if (rst_state == MLX5_FW_RST_STATE_TOGGLE_REQ) { + err = mlx5_pci_link_toggle(dev); + if (err) { + mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, err %d\n", err); + fw_reset->ret = err; + } + } + +done: + mlx5_fw_reset_complete_reload(dev, true); +} + +static void mlx5_sync_reset_abort_event(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, + reset_abort_work); + struct mlx5_core_dev *dev = fw_reset->dev; + + if (mlx5_sync_reset_clear_reset_requested(dev, true)) + return; + mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n"); +} + +static void mlx5_sync_reset_events_handle(struct mlx5_fw_reset *fw_reset, struct mlx5_eqe *eqe) +{ + struct mlx5_eqe_sync_fw_update *sync_fw_update_eqe; + u8 sync_event_rst_type; + + sync_fw_update_eqe = &eqe->data.sync_fw_update; + sync_event_rst_type = sync_fw_update_eqe->sync_rst_state & SYNC_RST_STATE_MASK; + switch (sync_event_rst_type) { + case MLX5_SYNC_RST_STATE_RESET_REQUEST: + queue_work(fw_reset->wq, &fw_reset->reset_request_work); + break; + case MLX5_SYNC_RST_STATE_RESET_UNLOAD: + queue_work(fw_reset->wq, &fw_reset->reset_unload_work); + break; + case MLX5_SYNC_RST_STATE_RESET_NOW: + queue_work(fw_reset->wq, &fw_reset->reset_now_work); + break; + case MLX5_SYNC_RST_STATE_RESET_ABORT: + queue_work(fw_reset->wq, &fw_reset->reset_abort_work); + break; + } +} + +static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long action, void *data) +{ + struct mlx5_fw_reset *fw_reset = mlx5_nb_cof(nb, struct mlx5_fw_reset, nb); + struct mlx5_eqe *eqe = data; + + if (test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags)) + return NOTIFY_DONE; + + switch (eqe->sub_type) { + case MLX5_GENERAL_SUBTYPE_FW_LIVE_PATCH_EVENT: + queue_work(fw_reset->wq, &fw_reset->fw_live_patch_work); + break; + case MLX5_GENERAL_SUBTYPE_PCI_SYNC_FOR_FW_UPDATE_EVENT: + mlx5_sync_reset_events_handle(fw_reset, eqe); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev) +{ + unsigned long pci_sync_update_timeout = mlx5_tout_ms(dev, PCI_SYNC_UPDATE); + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + unsigned long timeout; + int err; + + if (MLX5_CAP_GEN(dev, pci_sync_for_fw_update_with_driver_unload)) + pci_sync_update_timeout += mlx5_tout_ms(dev, RESET_UNLOAD); + timeout = msecs_to_jiffies(pci_sync_update_timeout); + if (!wait_for_completion_timeout(&fw_reset->done, timeout)) { + mlx5_core_warn(dev, "FW sync reset timeout after %lu seconds\n", + pci_sync_update_timeout / 1000); + err = -ETIMEDOUT; + goto out; + } + err = fw_reset->ret; + if (test_and_clear_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags)) { + mlx5_unload_one_devl_locked(dev, false); + mlx5_load_one_devl_locked(dev, true); + } +out: + clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); + return err; +} + +void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + MLX5_NB_INIT(&fw_reset->nb, fw_reset_event_notifier, GENERAL_EVENT); + mlx5_eq_notifier_register(dev, &fw_reset->nb); +} + +void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev) +{ + mlx5_eq_notifier_unregister(dev, &dev->priv.fw_reset->nb); +} + +void mlx5_drain_fw_reset(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + set_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags); + cancel_work_sync(&fw_reset->fw_live_patch_work); + cancel_work_sync(&fw_reset->reset_request_work); + cancel_work_sync(&fw_reset->reset_unload_work); + cancel_work_sync(&fw_reset->reset_reload_work); + cancel_work_sync(&fw_reset->reset_now_work); + cancel_work_sync(&fw_reset->reset_abort_work); +} + +static const struct devlink_param mlx5_fw_reset_devlink_params[] = { + DEVLINK_PARAM_GENERIC(ENABLE_REMOTE_DEV_RESET, BIT(DEVLINK_PARAM_CMODE_RUNTIME), + mlx5_fw_reset_enable_remote_dev_reset_get, + mlx5_fw_reset_enable_remote_dev_reset_set, NULL), +}; + +int mlx5_fw_reset_init(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL); + int err; + + if (!fw_reset) + return -ENOMEM; + fw_reset->wq = create_singlethread_workqueue("mlx5_fw_reset_events"); + if (!fw_reset->wq) { + kfree(fw_reset); + return -ENOMEM; + } + + fw_reset->dev = dev; + dev->priv.fw_reset = fw_reset; + + err = devl_params_register(priv_to_devlink(dev), + mlx5_fw_reset_devlink_params, + ARRAY_SIZE(mlx5_fw_reset_devlink_params)); + if (err) { + destroy_workqueue(fw_reset->wq); + kfree(fw_reset); + return err; + } + + INIT_WORK(&fw_reset->fw_live_patch_work, mlx5_fw_live_patch_event); + INIT_WORK(&fw_reset->reset_request_work, mlx5_sync_reset_request_event); + INIT_WORK(&fw_reset->reset_unload_work, mlx5_sync_reset_unload_event); + INIT_WORK(&fw_reset->reset_reload_work, mlx5_sync_reset_reload_work); + INIT_WORK(&fw_reset->reset_now_work, mlx5_sync_reset_now_event); + INIT_WORK(&fw_reset->reset_abort_work, mlx5_sync_reset_abort_event); + + init_completion(&fw_reset->done); + return 0; +} + +void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + devl_params_unregister(priv_to_devlink(dev), + mlx5_fw_reset_devlink_params, + ARRAY_SIZE(mlx5_fw_reset_devlink_params)); + destroy_workqueue(fw_reset->wq); + kfree(dev->priv.fw_reset); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h new file mode 100644 index 0000000000..ea527d06a8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_FW_RESET_H +#define __MLX5_FW_RESET_H + +#include "mlx5_core.h" + +int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type); +int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel, + struct netlink_ext_ack *extack); +int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev); + +int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev); +int mlx5_fw_reset_verify_fw_complete(struct mlx5_core_dev *dev, + struct netlink_ext_ack *extack); +void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev); +void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev); +void mlx5_drain_fw_reset(struct mlx5_core_dev *dev); +int mlx5_fw_reset_init(struct mlx5_core_dev *dev); +void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c new file mode 100644 index 0000000000..2fb2598b77 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -0,0 +1,928 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include "mlx5_core.h" +#include "lib/eq.h" +#include "lib/mlx5.h" +#include "lib/events.h" +#include "lib/pci_vsc.h" +#include "lib/tout.h" +#include "diag/fw_tracer.h" +#include "diag/reporter_vnic.h" + +enum { + MAX_MISSES = 3, +}; + +enum { + MLX5_DROP_HEALTH_WORK, +}; + +enum { + MLX5_SENSOR_NO_ERR = 0, + MLX5_SENSOR_PCI_COMM_ERR = 1, + MLX5_SENSOR_PCI_ERR = 2, + MLX5_SENSOR_NIC_DISABLED = 3, + MLX5_SENSOR_NIC_SW_RESET = 4, + MLX5_SENSOR_FW_SYND_RFR = 5, +}; + +enum { + MLX5_SEVERITY_MASK = 0x7, + MLX5_SEVERITY_VALID_MASK = 0x8, +}; + +u8 mlx5_get_nic_state(struct mlx5_core_dev *dev) +{ + return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7; +} + +void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state) +{ + u32 cur_cmdq_addr_l_sz; + + cur_cmdq_addr_l_sz = ioread32be(&dev->iseg->cmdq_addr_l_sz); + iowrite32be((cur_cmdq_addr_l_sz & 0xFFFFF000) | + state << MLX5_NIC_IFC_OFFSET, + &dev->iseg->cmdq_addr_l_sz); +} + +static bool sensor_pci_not_working(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + + /* Offline PCI reads return 0xffffffff */ + return (ioread32be(&h->fw_ver) == 0xffffffff); +} + +static int mlx5_health_get_rfr(u8 rfr_severity) +{ + return rfr_severity >> MLX5_RFR_BIT_OFFSET; +} + +static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + u8 synd = ioread8(&h->synd); + u8 rfr; + + rfr = mlx5_health_get_rfr(ioread8(&h->rfr_severity)); + + if (rfr && synd) + mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd); + return rfr && synd; +} + +u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev) +{ + if (sensor_pci_not_working(dev)) + return MLX5_SENSOR_PCI_COMM_ERR; + if (pci_channel_offline(dev->pdev)) + return MLX5_SENSOR_PCI_ERR; + if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) + return MLX5_SENSOR_NIC_DISABLED; + if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_SW_RESET) + return MLX5_SENSOR_NIC_SW_RESET; + if (sensor_fw_synd_rfr(dev)) + return MLX5_SENSOR_FW_SYND_RFR; + + return MLX5_SENSOR_NO_ERR; +} + +static int lock_sem_sw_reset(struct mlx5_core_dev *dev, bool lock) +{ + enum mlx5_vsc_state state; + int ret; + + if (!mlx5_core_is_pf(dev)) + return -EBUSY; + + /* Try to lock GW access, this stage doesn't return + * EBUSY because locked GW does not mean that other PF + * already started the reset. + */ + ret = mlx5_vsc_gw_lock(dev); + if (ret == -EBUSY) + return -EINVAL; + if (ret) + return ret; + + state = lock ? MLX5_VSC_LOCK : MLX5_VSC_UNLOCK; + /* At this stage, if the return status == EBUSY, then we know + * for sure that another PF started the reset, so don't allow + * another reset. + */ + ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, state); + if (ret) + mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n"); + + /* Unlock GW access */ + mlx5_vsc_gw_unlock(dev); + + return ret; +} + +static bool reset_fw_if_needed(struct mlx5_core_dev *dev) +{ + bool supported = (ioread32be(&dev->iseg->initializing) >> + MLX5_FW_RESET_SUPPORTED_OFFSET) & 1; + u32 fatal_error; + + if (!supported) + return false; + + /* The reset only needs to be issued by one PF. The health buffer is + * shared between all functions, and will be cleared during a reset. + * Check again to avoid a redundant 2nd reset. If the fatal errors was + * PCI related a reset won't help. + */ + fatal_error = mlx5_health_check_fatal_sensors(dev); + if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR || + fatal_error == MLX5_SENSOR_NIC_DISABLED || + fatal_error == MLX5_SENSOR_NIC_SW_RESET) { + mlx5_core_warn(dev, "Not issuing FW reset. Either it's already done or won't help."); + return false; + } + + mlx5_core_warn(dev, "Issuing FW Reset\n"); + /* Write the NIC interface field to initiate the reset, the command + * interface address also resides here, don't overwrite it. + */ + mlx5_set_nic_state(dev, MLX5_NIC_IFC_SW_RESET); + + return true; +} + +static void enter_error_state(struct mlx5_core_dev *dev, bool force) +{ + if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */ + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + mlx5_cmd_flush(dev); + } + + mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1); +} + +void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) +{ + bool err_detected = false; + + /* Mark the device as fatal in order to abort FW commands */ + if ((mlx5_health_check_fatal_sensors(dev) || force) && + dev->state == MLX5_DEVICE_STATE_UP) { + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + err_detected = true; + } + mutex_lock(&dev->intf_state_mutex); + if (!err_detected && dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + goto unlock;/* a previous error is still being handled */ + + enter_error_state(dev, force); +unlock: + mutex_unlock(&dev->intf_state_mutex); +} + +void mlx5_error_sw_reset(struct mlx5_core_dev *dev) +{ + unsigned long end, delay_ms = mlx5_tout_ms(dev, PCI_TOGGLE); + int lock = -EBUSY; + + mutex_lock(&dev->intf_state_mutex); + if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) + goto unlock; + + mlx5_core_err(dev, "start\n"); + + if (mlx5_health_check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) { + /* Get cr-dump and reset FW semaphore */ + lock = lock_sem_sw_reset(dev, true); + + if (lock == -EBUSY) { + delay_ms = mlx5_tout_ms(dev, FULL_CRDUMP); + goto recover_from_sw_reset; + } + /* Execute SW reset */ + reset_fw_if_needed(dev); + } + +recover_from_sw_reset: + /* Recover from SW reset */ + end = jiffies + msecs_to_jiffies(delay_ms); + do { + if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) + break; + + msleep(20); + } while (!time_after(jiffies, end)); + + if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) { + dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n", + mlx5_get_nic_state(dev), delay_ms); + } + + /* Release FW semaphore if you are the lock owner */ + if (!lock) + lock_sem_sw_reset(dev, false); + + mlx5_core_err(dev, "end\n"); + +unlock: + mutex_unlock(&dev->intf_state_mutex); +} + +static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) +{ + u8 nic_interface = mlx5_get_nic_state(dev); + + switch (nic_interface) { + case MLX5_NIC_IFC_FULL: + mlx5_core_warn(dev, "Expected to see disabled NIC but it is full driver\n"); + break; + + case MLX5_NIC_IFC_DISABLED: + mlx5_core_warn(dev, "starting teardown\n"); + break; + + case MLX5_NIC_IFC_NO_DRAM_NIC: + mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n"); + break; + + case MLX5_NIC_IFC_SW_RESET: + /* The IFC mode field is 3 bits, so it will read 0x7 in 2 cases: + * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded + * and this is a VF), this is not recoverable by SW reset. + * Logging of this is handled elsewhere. + * 2. FW reset has been issued by another function, driver can + * be reloaded to recover after the mode switches to + * MLX5_NIC_IFC_DISABLED. + */ + if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR) + mlx5_core_warn(dev, "NIC SW reset in progress\n"); + break; + + default: + mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n", + nic_interface); + } + + mlx5_disable_device(dev); +} + +int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev) +{ + unsigned long end; + + end = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FW_RESET)); + while (sensor_pci_not_working(dev)) { + if (time_after(jiffies, end)) + return -ETIMEDOUT; + if (test_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) { + mlx5_core_warn(dev, "device is being removed, stop waiting for PCI\n"); + return -ENODEV; + } + msleep(100); + } + return 0; +} + +static int mlx5_health_try_recover(struct mlx5_core_dev *dev) +{ + mlx5_core_warn(dev, "handling bad device here\n"); + mlx5_handle_bad_state(dev); + if (mlx5_health_wait_pci_up(dev)) { + mlx5_core_err(dev, "health recovery flow aborted, PCI reads still not working\n"); + return -EIO; + } + mlx5_core_err(dev, "starting health recovery flow\n"); + if (mlx5_recover_device(dev) || mlx5_health_check_fatal_sensors(dev)) { + mlx5_core_err(dev, "health recovery failed\n"); + return -EIO; + } + + mlx5_core_info(dev, "health recovery succeeded\n"); + return 0; +} + +static const char *hsynd_str(u8 synd) +{ + switch (synd) { + case MLX5_INITIAL_SEG_HEALTH_SYNDROME_FW_INTERNAL_ERR: + return "firmware internal error"; + case MLX5_INITIAL_SEG_HEALTH_SYNDROME_DEAD_IRISC: + return "irisc not responding"; + case MLX5_INITIAL_SEG_HEALTH_SYNDROME_HW_FATAL_ERR: + return "unrecoverable hardware error"; + case MLX5_INITIAL_SEG_HEALTH_SYNDROME_FW_CRC_ERR: + return "firmware CRC error"; + case MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_FETCH_PCI_ERR: + return "ICM fetch PCI error"; + case MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PAGE_ERR: + return "HW fatal error\n"; + case MLX5_INITIAL_SEG_HEALTH_SYNDROME_ASYNCHRONOUS_EQ_BUF_OVERRUN: + return "async EQ buffer overrun"; + case MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_IN_ERR: + return "EQ error"; + case MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_INV: + return "Invalid EQ referenced"; + case MLX5_INITIAL_SEG_HEALTH_SYNDROME_FFSER_ERR: + return "FFSER error"; + case MLX5_INITIAL_SEG_HEALTH_SYNDROME_HIGH_TEMP_ERR: + return "High temperature"; + default: + return "unrecognized error"; + } +} + +static const char *mlx5_loglevel_str(int level) +{ + switch (level) { + case LOGLEVEL_EMERG: + return "EMERGENCY"; + case LOGLEVEL_ALERT: + return "ALERT"; + case LOGLEVEL_CRIT: + return "CRITICAL"; + case LOGLEVEL_ERR: + return "ERROR"; + case LOGLEVEL_WARNING: + return "WARNING"; + case LOGLEVEL_NOTICE: + return "NOTICE"; + case LOGLEVEL_INFO: + return "INFO"; + case LOGLEVEL_DEBUG: + return "DEBUG"; + } + return "Unknown log level"; +} + +static int mlx5_health_get_severity(u8 rfr_severity) +{ + return rfr_severity & MLX5_SEVERITY_VALID_MASK ? + rfr_severity & MLX5_SEVERITY_MASK : LOGLEVEL_ERR; +} + +static void print_health_info(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + u8 rfr_severity; + int severity; + int i; + + /* If the syndrome is 0, the device is OK and no need to print buffer */ + if (!ioread8(&h->synd)) + return; + + if (ioread32be(&h->fw_ver) == 0xFFFFFFFF) { + mlx5_log(dev, LOGLEVEL_ERR, "PCI slot is unavailable\n"); + return; + } + + rfr_severity = ioread8(&h->rfr_severity); + severity = mlx5_health_get_severity(rfr_severity); + mlx5_log(dev, severity, "Health issue observed, %s, severity(%d) %s:\n", + hsynd_str(ioread8(&h->synd)), severity, mlx5_loglevel_str(severity)); + + for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) + mlx5_log(dev, severity, "assert_var[%d] 0x%08x\n", i, + ioread32be(h->assert_var + i)); + + mlx5_log(dev, severity, "assert_exit_ptr 0x%08x\n", ioread32be(&h->assert_exit_ptr)); + mlx5_log(dev, severity, "assert_callra 0x%08x\n", ioread32be(&h->assert_callra)); + mlx5_log(dev, severity, "fw_ver %d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev), + fw_rev_sub(dev)); + mlx5_log(dev, severity, "time %u\n", ioread32be(&h->time)); + mlx5_log(dev, severity, "hw_id 0x%08x\n", ioread32be(&h->hw_id)); + mlx5_log(dev, severity, "rfr %d\n", mlx5_health_get_rfr(rfr_severity)); + mlx5_log(dev, severity, "severity %d (%s)\n", severity, mlx5_loglevel_str(severity)); + mlx5_log(dev, severity, "irisc_index %d\n", ioread8(&h->irisc_index)); + mlx5_log(dev, severity, "synd 0x%x: %s\n", ioread8(&h->synd), + hsynd_str(ioread8(&h->synd))); + mlx5_log(dev, severity, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd)); + mlx5_log(dev, severity, "raw fw_ver 0x%08x\n", ioread32be(&h->fw_ver)); +} + +static int +mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + u8 synd; + int err; + + synd = ioread8(&h->synd); + err = devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd); + if (err || !synd) + return err; + return devlink_fmsg_string_pair_put(fmsg, "Description", hsynd_str(synd)); +} + +struct mlx5_fw_reporter_ctx { + u8 err_synd; + int miss_counter; +}; + +static int +mlx5_fw_reporter_ctx_pairs_put(struct devlink_fmsg *fmsg, + struct mlx5_fw_reporter_ctx *fw_reporter_ctx) +{ + int err; + + err = devlink_fmsg_u8_pair_put(fmsg, "syndrome", + fw_reporter_ctx->err_synd); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "fw_miss_counter", + fw_reporter_ctx->miss_counter); + if (err) + return err; + return 0; +} + +static int +mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev, + struct devlink_fmsg *fmsg) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + u8 rfr_severity; + int err; + int i; + + if (!ioread8(&h->synd)) + return 0; + + err = devlink_fmsg_pair_nest_start(fmsg, "health buffer"); + if (err) + return err; + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + err = devlink_fmsg_arr_pair_nest_start(fmsg, "assert_var"); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) { + err = devlink_fmsg_u32_put(fmsg, ioread32be(h->assert_var + i)); + if (err) + return err; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "assert_exit_ptr", + ioread32be(&h->assert_exit_ptr)); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "assert_callra", + ioread32be(&h->assert_callra)); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "time", ioread32be(&h->time)); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "hw_id", ioread32be(&h->hw_id)); + if (err) + return err; + rfr_severity = ioread8(&h->rfr_severity); + err = devlink_fmsg_u8_pair_put(fmsg, "rfr", mlx5_health_get_rfr(rfr_severity)); + if (err) + return err; + err = devlink_fmsg_u8_pair_put(fmsg, "severity", mlx5_health_get_severity(rfr_severity)); + if (err) + return err; + err = devlink_fmsg_u8_pair_put(fmsg, "irisc_index", + ioread8(&h->irisc_index)); + if (err) + return err; + err = devlink_fmsg_u8_pair_put(fmsg, "synd", ioread8(&h->synd)); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "ext_synd", + ioread16be(&h->ext_synd)); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "raw_fw_ver", + ioread32be(&h->fw_ver)); + if (err) + return err; + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + return devlink_fmsg_pair_nest_end(fmsg); +} + +static int +mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + int err; + + err = mlx5_fw_tracer_trigger_core_dump_general(dev); + if (err) + return err; + + if (priv_ctx) { + struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx; + + err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx); + if (err) + return err; + } + + err = mlx5_fw_reporter_heath_buffer_data_put(dev, fmsg); + if (err) + return err; + return mlx5_fw_tracer_get_saved_traces_objects(dev->tracer, fmsg); +} + +static void mlx5_fw_reporter_err_work(struct work_struct *work) +{ + struct mlx5_fw_reporter_ctx fw_reporter_ctx; + struct mlx5_core_health *health; + + health = container_of(work, struct mlx5_core_health, report_work); + + if (IS_ERR_OR_NULL(health->fw_reporter)) + return; + + fw_reporter_ctx.err_synd = health->synd; + fw_reporter_ctx.miss_counter = health->miss_counter; + if (fw_reporter_ctx.err_synd) { + devlink_health_report(health->fw_reporter, + "FW syndrome reported", &fw_reporter_ctx); + return; + } + if (fw_reporter_ctx.miss_counter) + devlink_health_report(health->fw_reporter, + "FW miss counter reported", + &fw_reporter_ctx); +} + +static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = { + .name = "fw", + .diagnose = mlx5_fw_reporter_diagnose, + .dump = mlx5_fw_reporter_dump, +}; + +static int +mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter, + void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + + return mlx5_health_try_recover(dev); +} + +static int +mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + u32 crdump_size = dev->priv.health.crdump_size; + u32 *cr_data; + int err; + + if (!mlx5_core_is_pf(dev)) + return -EPERM; + + cr_data = kvmalloc(crdump_size, GFP_KERNEL); + if (!cr_data) + return -ENOMEM; + err = mlx5_crdump_collect(dev, cr_data); + if (err) + goto free_data; + + if (priv_ctx) { + struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx; + + err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx); + if (err) + goto free_data; + } + + err = devlink_fmsg_binary_pair_put(fmsg, "crdump_data", cr_data, crdump_size); + +free_data: + kvfree(cr_data); + return err; +} + +static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) +{ + struct mlx5_fw_reporter_ctx fw_reporter_ctx; + struct mlx5_core_health *health; + struct mlx5_core_dev *dev; + struct devlink *devlink; + struct mlx5_priv *priv; + + health = container_of(work, struct mlx5_core_health, fatal_report_work); + priv = container_of(health, struct mlx5_priv, health); + dev = container_of(priv, struct mlx5_core_dev, priv); + devlink = priv_to_devlink(dev); + + mutex_lock(&dev->intf_state_mutex); + if (test_bit(MLX5_DROP_HEALTH_WORK, &health->flags)) { + mlx5_core_err(dev, "health works are not permitted at this stage\n"); + mutex_unlock(&dev->intf_state_mutex); + return; + } + mutex_unlock(&dev->intf_state_mutex); + enter_error_state(dev, false); + if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) { + devl_lock(devlink); + if (mlx5_health_try_recover(dev)) + mlx5_core_err(dev, "health recovery failed\n"); + devl_unlock(devlink); + return; + } + fw_reporter_ctx.err_synd = health->synd; + fw_reporter_ctx.miss_counter = health->miss_counter; + if (devlink_health_report(health->fw_fatal_reporter, + "FW fatal error reported", &fw_reporter_ctx) == -ECANCELED) { + /* If recovery wasn't performed, due to grace period, + * unload the driver. This ensures that the driver + * closes all its resources and it is not subjected to + * requests from the kernel. + */ + mlx5_core_err(dev, "Driver is in error state. Unloading\n"); + mlx5_unload_one(dev, false); + } +} + +static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = { + .name = "fw_fatal", + .recover = mlx5_fw_fatal_reporter_recover, + .dump = mlx5_fw_fatal_reporter_dump, +}; + +#define MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD 180000 +#define MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD 60000 +#define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000 +#define MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD + +void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct devlink *devlink = priv_to_devlink(dev); + u64 grace_period; + + if (mlx5_core_is_ecpf(dev)) { + grace_period = MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD; + } else if (mlx5_core_is_pf(dev)) { + grace_period = MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD; + } else { + /* VF or SF */ + grace_period = MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD; + } + + health->fw_reporter = + devl_health_reporter_create(devlink, &mlx5_fw_reporter_ops, + 0, dev); + if (IS_ERR(health->fw_reporter)) + mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n", + PTR_ERR(health->fw_reporter)); + + health->fw_fatal_reporter = + devl_health_reporter_create(devlink, + &mlx5_fw_fatal_reporter_ops, + grace_period, + dev); + if (IS_ERR(health->fw_fatal_reporter)) + mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n", + PTR_ERR(health->fw_fatal_reporter)); +} + +static void mlx5_fw_reporters_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + if (!IS_ERR_OR_NULL(health->fw_reporter)) + devlink_health_reporter_destroy(health->fw_reporter); + + if (!IS_ERR_OR_NULL(health->fw_fatal_reporter)) + devlink_health_reporter_destroy(health->fw_fatal_reporter); +} + +static unsigned long get_next_poll_jiffies(struct mlx5_core_dev *dev) +{ + unsigned long next; + + get_random_bytes(&next, sizeof(next)); + next %= HZ; + next += jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL)); + + return next; +} + +void mlx5_trigger_health_work(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + if (!mlx5_dev_is_lightweight(dev)) + queue_work(health->wq, &health->fatal_report_work); +} + +#define MLX5_MSEC_PER_HOUR (MSEC_PER_SEC * 60 * 60) +static void mlx5_health_log_ts_update(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + u32 out[MLX5_ST_SZ_DW(mrtc_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(mrtc_reg)] = {}; + struct mlx5_core_health *health; + struct mlx5_core_dev *dev; + struct mlx5_priv *priv; + u64 now_us; + + health = container_of(dwork, struct mlx5_core_health, update_fw_log_ts_work); + priv = container_of(health, struct mlx5_priv, health); + dev = container_of(priv, struct mlx5_core_dev, priv); + + now_us = ktime_to_us(ktime_get_real()); + + MLX5_SET(mrtc_reg, in, time_h, now_us >> 32); + MLX5_SET(mrtc_reg, in, time_l, now_us & 0xFFFFFFFF); + mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MRTC, 0, 1); + + queue_delayed_work(health->wq, &health->update_fw_log_ts_work, + msecs_to_jiffies(MLX5_MSEC_PER_HOUR)); +} + +static void poll_health(struct timer_list *t) +{ + struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer); + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + u32 fatal_error; + u8 prev_synd; + u32 count; + + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + goto out; + + fatal_error = mlx5_health_check_fatal_sensors(dev); + + if (fatal_error && !health->fatal_error) { + mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error); + dev->priv.health.fatal_error = fatal_error; + print_health_info(dev); + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + mlx5_trigger_health_work(dev); + return; + } + + count = ioread32be(health->health_counter); + if (count == health->prev) + ++health->miss_counter; + else + health->miss_counter = 0; + + health->prev = count; + if (health->miss_counter == MAX_MISSES) { + mlx5_core_err(dev, "device's health compromised - reached miss count\n"); + print_health_info(dev); + queue_work(health->wq, &health->report_work); + } + + prev_synd = health->synd; + health->synd = ioread8(&h->synd); + if (health->synd && health->synd != prev_synd) + queue_work(health->wq, &health->report_work); + +out: + mod_timer(&health->timer, get_next_poll_jiffies(dev)); +} + +void mlx5_start_health_poll(struct mlx5_core_dev *dev) +{ + u64 poll_interval_ms = mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL); + struct mlx5_core_health *health = &dev->priv.health; + + timer_setup(&health->timer, poll_health, 0); + health->fatal_error = MLX5_SENSOR_NO_ERR; + clear_bit(MLX5_DROP_HEALTH_WORK, &health->flags); + health->health = &dev->iseg->health; + health->health_counter = &dev->iseg->health_counter; + + health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms); + add_timer(&health->timer); +} + +void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) +{ + struct mlx5_core_health *health = &dev->priv.health; + + if (disable_health) + set_bit(MLX5_DROP_HEALTH_WORK, &health->flags); + + del_timer_sync(&health->timer); +} + +void mlx5_start_health_fw_log_up(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc)) + queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0); +} + +void mlx5_drain_health_wq(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + set_bit(MLX5_DROP_HEALTH_WORK, &health->flags); + cancel_delayed_work_sync(&health->update_fw_log_ts_work); + cancel_work_sync(&health->report_work); + cancel_work_sync(&health->fatal_report_work); +} + +void mlx5_health_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + cancel_delayed_work_sync(&health->update_fw_log_ts_work); + destroy_workqueue(health->wq); + mlx5_reporter_vnic_destroy(dev); + mlx5_fw_reporters_destroy(dev); +} + +int mlx5_health_init(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + struct mlx5_core_health *health; + char *name; + + if (!mlx5_dev_is_lightweight(dev)) { + devl_lock(devlink); + mlx5_fw_reporters_create(dev); + devl_unlock(devlink); + } + mlx5_reporter_vnic_create(dev); + + health = &dev->priv.health; + name = kmalloc(64, GFP_KERNEL); + if (!name) + goto out_err; + + strcpy(name, "mlx5_health"); + strcat(name, dev_name(dev->device)); + health->wq = create_singlethread_workqueue(name); + kfree(name); + if (!health->wq) + goto out_err; + INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work); + INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work); + INIT_DELAYED_WORK(&health->update_fw_log_ts_work, mlx5_health_log_ts_update); + + return 0; + +out_err: + mlx5_reporter_vnic_destroy(dev); + mlx5_fw_reporters_destroy(dev); + return -ENOMEM; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/hwmon.c b/drivers/net/ethernet/mellanox/mlx5/core/hwmon.c new file mode 100644 index 0000000000..353f81dccd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/hwmon.c @@ -0,0 +1,418 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved + +#include +#include +#include +#include +#include +#include "mlx5_core.h" +#include "hwmon.h" + +#define CHANNELS_TYPE_NUM 2 /* chip channel and temp channel */ +#define CHIP_CONFIG_NUM 1 + +/* module 0 is mapped to sensor_index 64 in MTMP register */ +#define to_mtmp_module_sensor_idx(idx) (64 + (idx)) + +/* All temperatures retrieved in units of 0.125C. hwmon framework expect + * it in units of millidegrees C. Hence multiply values by 125. + */ +#define mtmp_temp_to_mdeg(temp) ((temp) * 125) + +struct temp_channel_desc { + u32 sensor_index; + char sensor_name[32]; +}; + +/* chip_channel_config and channel_info arrays must be 0-terminated, hence + 1 */ +struct mlx5_hwmon { + struct mlx5_core_dev *mdev; + struct device *hwmon_dev; + struct hwmon_channel_info chip_info; + u32 chip_channel_config[CHIP_CONFIG_NUM + 1]; + struct hwmon_channel_info temp_info; + u32 *temp_channel_config; + const struct hwmon_channel_info *channel_info[CHANNELS_TYPE_NUM + 1]; + struct hwmon_chip_info chip; + struct temp_channel_desc *temp_channel_desc; + u32 asic_platform_scount; + u32 module_scount; +}; + +static int mlx5_hwmon_query_mtmp(struct mlx5_core_dev *mdev, u32 sensor_index, u32 *mtmp_out) +{ + u32 mtmp_in[MLX5_ST_SZ_DW(mtmp_reg)] = {}; + + MLX5_SET(mtmp_reg, mtmp_in, sensor_index, sensor_index); + + return mlx5_core_access_reg(mdev, mtmp_in, sizeof(mtmp_in), + mtmp_out, MLX5_ST_SZ_BYTES(mtmp_reg), + MLX5_REG_MTMP, 0, 0); +} + +static int mlx5_hwmon_reset_max_temp(struct mlx5_core_dev *mdev, int sensor_index) +{ + u32 mtmp_out[MLX5_ST_SZ_DW(mtmp_reg)] = {}; + u32 mtmp_in[MLX5_ST_SZ_DW(mtmp_reg)] = {}; + + MLX5_SET(mtmp_reg, mtmp_in, sensor_index, sensor_index); + MLX5_SET(mtmp_reg, mtmp_in, mtr, 1); + + return mlx5_core_access_reg(mdev, mtmp_in, sizeof(mtmp_in), + mtmp_out, sizeof(mtmp_out), + MLX5_REG_MTMP, 0, 0); +} + +static int mlx5_hwmon_enable_max_temp(struct mlx5_core_dev *mdev, int sensor_index) +{ + u32 mtmp_out[MLX5_ST_SZ_DW(mtmp_reg)] = {}; + u32 mtmp_in[MLX5_ST_SZ_DW(mtmp_reg)] = {}; + int err; + + err = mlx5_hwmon_query_mtmp(mdev, sensor_index, mtmp_in); + if (err) + return err; + + MLX5_SET(mtmp_reg, mtmp_in, mte, 1); + return mlx5_core_access_reg(mdev, mtmp_in, sizeof(mtmp_in), + mtmp_out, sizeof(mtmp_out), + MLX5_REG_MTMP, 0, 1); +} + +static int mlx5_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, + int channel, long *val) +{ + struct mlx5_hwmon *hwmon = dev_get_drvdata(dev); + u32 mtmp_out[MLX5_ST_SZ_DW(mtmp_reg)] = {}; + int err; + + if (type != hwmon_temp) + return -EOPNOTSUPP; + + err = mlx5_hwmon_query_mtmp(hwmon->mdev, hwmon->temp_channel_desc[channel].sensor_index, + mtmp_out); + if (err) + return err; + + switch (attr) { + case hwmon_temp_input: + *val = mtmp_temp_to_mdeg(MLX5_GET(mtmp_reg, mtmp_out, temperature)); + return 0; + case hwmon_temp_highest: + *val = mtmp_temp_to_mdeg(MLX5_GET(mtmp_reg, mtmp_out, max_temperature)); + return 0; + case hwmon_temp_crit: + *val = mtmp_temp_to_mdeg(MLX5_GET(mtmp_reg, mtmp_out, temp_threshold_hi)); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int mlx5_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, + int channel, long val) +{ + struct mlx5_hwmon *hwmon = dev_get_drvdata(dev); + + if (type != hwmon_temp || attr != hwmon_temp_reset_history) + return -EOPNOTSUPP; + + return mlx5_hwmon_reset_max_temp(hwmon->mdev, + hwmon->temp_channel_desc[channel].sensor_index); +} + +static umode_t mlx5_hwmon_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr, + int channel) +{ + if (type != hwmon_temp) + return 0; + + switch (attr) { + case hwmon_temp_input: + case hwmon_temp_highest: + case hwmon_temp_crit: + case hwmon_temp_label: + return 0444; + case hwmon_temp_reset_history: + return 0200; + default: + return 0; + } +} + +static int mlx5_hwmon_read_string(struct device *dev, enum hwmon_sensor_types type, u32 attr, + int channel, const char **str) +{ + struct mlx5_hwmon *hwmon = dev_get_drvdata(dev); + + if (type != hwmon_temp || attr != hwmon_temp_label) + return -EOPNOTSUPP; + + *str = (const char *)hwmon->temp_channel_desc[channel].sensor_name; + return 0; +} + +static const struct hwmon_ops mlx5_hwmon_ops = { + .read = mlx5_hwmon_read, + .read_string = mlx5_hwmon_read_string, + .is_visible = mlx5_hwmon_is_visible, + .write = mlx5_hwmon_write, +}; + +static int mlx5_hwmon_init_channels_names(struct mlx5_hwmon *hwmon) +{ + u32 i; + + for (i = 0; i < hwmon->asic_platform_scount + hwmon->module_scount; i++) { + u32 mtmp_out[MLX5_ST_SZ_DW(mtmp_reg)] = {}; + char *sensor_name; + int err; + + err = mlx5_hwmon_query_mtmp(hwmon->mdev, hwmon->temp_channel_desc[i].sensor_index, + mtmp_out); + if (err) + return err; + + sensor_name = MLX5_ADDR_OF(mtmp_reg, mtmp_out, sensor_name_hi); + if (!*sensor_name) { + snprintf(hwmon->temp_channel_desc[i].sensor_name, + sizeof(hwmon->temp_channel_desc[i].sensor_name), "sensor%u", + hwmon->temp_channel_desc[i].sensor_index); + continue; + } + + memcpy(&hwmon->temp_channel_desc[i].sensor_name, sensor_name, + MLX5_FLD_SZ_BYTES(mtmp_reg, sensor_name_hi) + + MLX5_FLD_SZ_BYTES(mtmp_reg, sensor_name_lo)); + } + + return 0; +} + +static int mlx5_hwmon_get_module_sensor_index(struct mlx5_core_dev *mdev, u32 *module_index) +{ + int module_num; + int err; + + err = mlx5_query_module_num(mdev, &module_num); + if (err) + return err; + + *module_index = to_mtmp_module_sensor_idx(module_num); + + return 0; +} + +static int mlx5_hwmon_init_sensors_indexes(struct mlx5_hwmon *hwmon, u64 sensor_map) +{ + DECLARE_BITMAP(smap, BITS_PER_TYPE(sensor_map)); + unsigned long bit_pos; + int err = 0; + int i = 0; + + bitmap_from_u64(smap, sensor_map); + + for_each_set_bit(bit_pos, smap, BITS_PER_TYPE(sensor_map)) { + hwmon->temp_channel_desc[i].sensor_index = bit_pos; + i++; + } + + if (hwmon->module_scount) + err = mlx5_hwmon_get_module_sensor_index(hwmon->mdev, + &hwmon->temp_channel_desc[i].sensor_index); + + return err; +} + +static void mlx5_hwmon_channel_info_init(struct mlx5_hwmon *hwmon) +{ + int i; + + hwmon->channel_info[0] = &hwmon->chip_info; + hwmon->channel_info[1] = &hwmon->temp_info; + + hwmon->chip_channel_config[0] = HWMON_C_REGISTER_TZ; + hwmon->chip_info.config = (const u32 *)hwmon->chip_channel_config; + hwmon->chip_info.type = hwmon_chip; + + for (i = 0; i < hwmon->asic_platform_scount + hwmon->module_scount; i++) + hwmon->temp_channel_config[i] = HWMON_T_INPUT | HWMON_T_HIGHEST | HWMON_T_CRIT | + HWMON_T_RESET_HISTORY | HWMON_T_LABEL; + + hwmon->temp_info.config = (const u32 *)hwmon->temp_channel_config; + hwmon->temp_info.type = hwmon_temp; +} + +static int mlx5_hwmon_is_module_mon_cap(struct mlx5_core_dev *mdev, bool *mon_cap) +{ + u32 mtmp_out[MLX5_ST_SZ_DW(mtmp_reg)]; + u32 module_index; + int err; + + err = mlx5_hwmon_get_module_sensor_index(mdev, &module_index); + if (err) + return err; + + err = mlx5_hwmon_query_mtmp(mdev, module_index, mtmp_out); + if (err) + return err; + + if (MLX5_GET(mtmp_reg, mtmp_out, temperature)) + *mon_cap = true; + + return 0; +} + +static int mlx5_hwmon_get_sensors_count(struct mlx5_core_dev *mdev, u32 *asic_platform_scount) +{ + u32 mtcap_out[MLX5_ST_SZ_DW(mtcap_reg)] = {}; + u32 mtcap_in[MLX5_ST_SZ_DW(mtcap_reg)] = {}; + int err; + + err = mlx5_core_access_reg(mdev, mtcap_in, sizeof(mtcap_in), + mtcap_out, sizeof(mtcap_out), + MLX5_REG_MTCAP, 0, 0); + if (err) + return err; + + *asic_platform_scount = MLX5_GET(mtcap_reg, mtcap_out, sensor_count); + + return 0; +} + +static void mlx5_hwmon_free(struct mlx5_hwmon *hwmon) +{ + if (!hwmon) + return; + + kfree(hwmon->temp_channel_config); + kfree(hwmon->temp_channel_desc); + kfree(hwmon); +} + +static struct mlx5_hwmon *mlx5_hwmon_alloc(struct mlx5_core_dev *mdev) +{ + struct mlx5_hwmon *hwmon; + bool mon_cap = false; + u32 sensors_count; + int err; + + hwmon = kzalloc(sizeof(*mdev->hwmon), GFP_KERNEL); + if (!hwmon) + return ERR_PTR(-ENOMEM); + + err = mlx5_hwmon_get_sensors_count(mdev, &hwmon->asic_platform_scount); + if (err) + goto err_free_hwmon; + + /* check if module sensor has thermal mon cap. if yes, allocate channel desc for it */ + err = mlx5_hwmon_is_module_mon_cap(mdev, &mon_cap); + if (err) + goto err_free_hwmon; + + hwmon->module_scount = mon_cap ? 1 : 0; + sensors_count = hwmon->asic_platform_scount + hwmon->module_scount; + hwmon->temp_channel_desc = kcalloc(sensors_count, sizeof(*hwmon->temp_channel_desc), + GFP_KERNEL); + if (!hwmon->temp_channel_desc) { + err = -ENOMEM; + goto err_free_hwmon; + } + + /* sensors configuration values array, must be 0-terminated hence, + 1 */ + hwmon->temp_channel_config = kcalloc(sensors_count + 1, sizeof(*hwmon->temp_channel_config), + GFP_KERNEL); + if (!hwmon->temp_channel_config) { + err = -ENOMEM; + goto err_free_temp_channel_desc; + } + + hwmon->mdev = mdev; + + return hwmon; + +err_free_temp_channel_desc: + kfree(hwmon->temp_channel_desc); +err_free_hwmon: + kfree(hwmon); + return ERR_PTR(err); +} + +static int mlx5_hwmon_dev_init(struct mlx5_hwmon *hwmon) +{ + u32 mtcap_out[MLX5_ST_SZ_DW(mtcap_reg)] = {}; + u32 mtcap_in[MLX5_ST_SZ_DW(mtcap_reg)] = {}; + int err; + int i; + + err = mlx5_core_access_reg(hwmon->mdev, mtcap_in, sizeof(mtcap_in), + mtcap_out, sizeof(mtcap_out), + MLX5_REG_MTCAP, 0, 0); + if (err) + return err; + + mlx5_hwmon_channel_info_init(hwmon); + mlx5_hwmon_init_sensors_indexes(hwmon, MLX5_GET64(mtcap_reg, mtcap_out, sensor_map)); + err = mlx5_hwmon_init_channels_names(hwmon); + if (err) + return err; + + for (i = 0; i < hwmon->asic_platform_scount + hwmon->module_scount; i++) { + err = mlx5_hwmon_enable_max_temp(hwmon->mdev, + hwmon->temp_channel_desc[i].sensor_index); + if (err) + return err; + } + + hwmon->chip.ops = &mlx5_hwmon_ops; + hwmon->chip.info = (const struct hwmon_channel_info **)hwmon->channel_info; + + return 0; +} + +int mlx5_hwmon_dev_register(struct mlx5_core_dev *mdev) +{ + struct device *dev = mdev->device; + struct mlx5_hwmon *hwmon; + int err; + + if (!MLX5_CAP_MCAM_REG(mdev, mtmp)) + return 0; + + hwmon = mlx5_hwmon_alloc(mdev); + if (IS_ERR(hwmon)) + return PTR_ERR(hwmon); + + err = mlx5_hwmon_dev_init(hwmon); + if (err) + goto err_free_hwmon; + + hwmon->hwmon_dev = hwmon_device_register_with_info(dev, "mlx5", + hwmon, + &hwmon->chip, + NULL); + if (IS_ERR(hwmon->hwmon_dev)) { + err = PTR_ERR(hwmon->hwmon_dev); + goto err_free_hwmon; + } + + mdev->hwmon = hwmon; + return 0; + +err_free_hwmon: + mlx5_hwmon_free(hwmon); + return err; +} + +void mlx5_hwmon_dev_unregister(struct mlx5_core_dev *mdev) +{ + struct mlx5_hwmon *hwmon = mdev->hwmon; + + if (!hwmon) + return; + + hwmon_device_unregister(hwmon->hwmon_dev); + mlx5_hwmon_free(hwmon); + mdev->hwmon = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/hwmon.h b/drivers/net/ethernet/mellanox/mlx5/core/hwmon.h new file mode 100644 index 0000000000..999654a9b9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/hwmon.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB + * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved + */ +#ifndef __MLX5_HWMON_H__ +#define __MLX5_HWMON_H__ + +#include + +#if IS_ENABLED(CONFIG_HWMON) + +int mlx5_hwmon_dev_register(struct mlx5_core_dev *mdev); +void mlx5_hwmon_dev_unregister(struct mlx5_core_dev *mdev); + +#else +static inline int mlx5_hwmon_dev_register(struct mlx5_core_dev *mdev) +{ + return 0; +} + +static inline void mlx5_hwmon_dev_unregister(struct mlx5_core_dev *mdev) {} + +#endif + +#endif /* __MLX5_HWMON_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c new file mode 100644 index 0000000000..779d92b762 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "en.h" +#include "ipoib.h" +#include "en/fs_ethtool.h" + +static void mlx5i_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + mlx5e_ethtool_get_drvinfo(priv, drvinfo); + strscpy(drvinfo->driver, KBUILD_MODNAME "[ib_ipoib]", + sizeof(drvinfo->driver)); +} + +static void mlx5i_get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + mlx5e_ethtool_get_strings(priv, stringset, data); +} + +static int mlx5i_get_sset_count(struct net_device *dev, int sset) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + return mlx5e_ethtool_get_sset_count(priv, sset); +} + +static void mlx5i_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, + u64 *data) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + mlx5e_ethtool_get_ethtool_stats(priv, stats, data); +} + +static int mlx5i_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + return mlx5e_ethtool_set_ringparam(priv, param); +} + +static void mlx5i_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + mlx5e_ethtool_get_ringparam(priv, param, kernel_param); +} + +static int mlx5i_set_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5i_priv *ipriv = netdev_priv(dev); + struct mlx5e_priv *epriv = mlx5i_epriv(dev); + + /* rtnl lock protects from race between this ethtool op and sub + * interface ndo_init/uninit. + */ + ASSERT_RTNL(); + if (ipriv->num_sub_interfaces > 0) { + mlx5_core_warn(epriv->mdev, + "can't change number of channels for interfaces with sub interfaces (%u)\n", + ipriv->num_sub_interfaces); + return -EINVAL; + } + + return mlx5e_ethtool_set_channels(epriv, ch); +} + +static void mlx5i_get_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + mlx5e_ethtool_get_channels(priv, ch); +} + +static int mlx5i_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + return mlx5e_ethtool_set_coalesce(priv, coal, kernel_coal, extack); +} + +static int mlx5i_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal); +} + +static int mlx5i_get_ts_info(struct net_device *netdev, + struct ethtool_ts_info *info) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + return mlx5e_ethtool_get_ts_info(priv, info); +} + +static int mlx5i_flash_device(struct net_device *netdev, + struct ethtool_flash *flash) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + return mlx5e_ethtool_flash_device(priv, flash); +} + +static inline int mlx5_ptys_width_enum_to_int(enum mlx5_ptys_width width) +{ + switch (width) { + case MLX5_PTYS_WIDTH_1X: return 1; + case MLX5_PTYS_WIDTH_2X: return 2; + case MLX5_PTYS_WIDTH_4X: return 4; + case MLX5_PTYS_WIDTH_8X: return 8; + case MLX5_PTYS_WIDTH_12X: return 12; + default: return -1; + } +} + +enum mlx5_ptys_rate { + MLX5_PTYS_RATE_SDR = 1 << 0, + MLX5_PTYS_RATE_DDR = 1 << 1, + MLX5_PTYS_RATE_QDR = 1 << 2, + MLX5_PTYS_RATE_FDR10 = 1 << 3, + MLX5_PTYS_RATE_FDR = 1 << 4, + MLX5_PTYS_RATE_EDR = 1 << 5, + MLX5_PTYS_RATE_HDR = 1 << 6, + MLX5_PTYS_RATE_NDR = 1 << 7, + MLX5_PTYS_RATE_XDR = 1 << 8, +}; + +static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate) +{ + switch (rate) { + case MLX5_PTYS_RATE_SDR: return 2500; + case MLX5_PTYS_RATE_DDR: return 5000; + case MLX5_PTYS_RATE_QDR: + case MLX5_PTYS_RATE_FDR10: return 10000; + case MLX5_PTYS_RATE_FDR: return 14000; + case MLX5_PTYS_RATE_EDR: return 25000; + case MLX5_PTYS_RATE_HDR: return 50000; + case MLX5_PTYS_RATE_NDR: return 100000; + case MLX5_PTYS_RATE_XDR: return 200000; + default: return -1; + } +} + +static u32 mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper) +{ + int rate, width; + + rate = mlx5_ptys_rate_enum_to_int(ib_proto_oper); + if (rate < 0) + return SPEED_UNKNOWN; + width = mlx5_ptys_width_enum_to_int(ib_link_width_oper); + if (width < 0) + return SPEED_UNKNOWN; + + return rate * width; +} + +static int mlx5i_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u16 ib_link_width_oper; + u16 ib_proto_oper; + int speed, ret; + + ret = mlx5_query_ib_port_oper(mdev, &ib_link_width_oper, &ib_proto_oper, + 1); + if (ret) + return ret; + + ethtool_link_ksettings_zero_link_mode(link_ksettings, supported); + ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); + + speed = mlx5i_get_speed_settings(ib_link_width_oper, ib_proto_oper); + link_ksettings->base.speed = speed; + link_ksettings->base.duplex = speed == SPEED_UNKNOWN ? DUPLEX_UNKNOWN : DUPLEX_FULL; + + link_ksettings->base.port = PORT_OTHER; + + link_ksettings->base.autoneg = AUTONEG_DISABLE; + + return 0; +} + +static u32 mlx5i_flow_type_mask(u32 flow_type) +{ + return flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS); +} + +static int mlx5i_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct ethtool_rx_flow_spec *fs = &cmd->fs; + + if (mlx5i_flow_type_mask(fs->flow_type) == ETHER_FLOW) + return -EINVAL; + + return mlx5e_ethtool_set_rxnfc(priv, cmd); +} + +static int mlx5i_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, + u32 *rule_locs) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + /* ETHTOOL_GRXRINGS is needed by ethtool -x which is not part + * of rxnfc. We keep this logic out of mlx5e_ethtool_get_rxnfc, + * to avoid breaking "ethtool -x" when mlx5e_ethtool_get_rxnfc + * is compiled out via CONFIG_MLX5_EN_RXNFC=n. + */ + if (info->cmd == ETHTOOL_GRXRINGS) { + info->data = priv->channels.params.num_channels; + return 0; + } + + return mlx5e_ethtool_get_rxnfc(priv, info, rule_locs); +} + +const struct ethtool_ops mlx5i_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USE_ADAPTIVE, + .get_drvinfo = mlx5i_get_drvinfo, + .get_strings = mlx5i_get_strings, + .get_sset_count = mlx5i_get_sset_count, + .get_ethtool_stats = mlx5i_get_ethtool_stats, + .get_ringparam = mlx5i_get_ringparam, + .set_ringparam = mlx5i_set_ringparam, + .flash_device = mlx5i_flash_device, + .get_channels = mlx5i_get_channels, + .set_channels = mlx5i_set_channels, + .get_coalesce = mlx5i_get_coalesce, + .set_coalesce = mlx5i_set_coalesce, + .get_ts_info = mlx5i_get_ts_info, + .get_rxnfc = mlx5i_get_rxnfc, + .set_rxnfc = mlx5i_set_rxnfc, + .get_link_ksettings = mlx5i_get_link_ksettings, + .get_link = ethtool_op_get_link, +}; + +const struct ethtool_ops mlx5i_pkey_ethtool_ops = { + .get_drvinfo = mlx5i_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_ts_info = mlx5i_get_ts_info, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c new file mode 100644 index 0000000000..baa7ef8123 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -0,0 +1,848 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include "en.h" +#include "en/params.h" +#include "ipoib.h" +#include "en/fs_ethtool.h" + +#define IB_DEFAULT_Q_KEY 0xb1b +#define MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE 9 + +static int mlx5i_open(struct net_device *netdev); +static int mlx5i_close(struct net_device *netdev); +static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu); + +static const struct net_device_ops mlx5i_netdev_ops = { + .ndo_open = mlx5i_open, + .ndo_stop = mlx5i_close, + .ndo_get_stats64 = mlx5i_get_stats, + .ndo_init = mlx5i_dev_init, + .ndo_uninit = mlx5i_dev_cleanup, + .ndo_change_mtu = mlx5i_change_mtu, + .ndo_eth_ioctl = mlx5i_ioctl, +}; + +/* IPoIB mlx5 netdev profile */ +static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */ + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, false); + mlx5e_set_rq_type(mdev, params); + mlx5e_init_rq_type_params(mdev, params); + + /* RQ size in ipoib by default is 512 */ + params->log_rq_mtu_frames = is_kdump_kernel() ? + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : + MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE; + + params->packet_merge.type = MLX5E_PACKET_MERGE_NONE; + params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN; + + /* CQE compression is not supported for IPoIB */ + params->rx_cqe_compress_def = false; + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def); +} + +/* Called directly after IPoIB netdevice was created to initialize SW structs */ +int mlx5i_init(struct mlx5_core_dev *mdev, struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + netif_carrier_off(netdev); + mlx5e_set_netdev_mtu_boundaries(priv); + netdev->mtu = netdev->max_mtu; + + mlx5e_build_nic_params(priv, NULL, netdev->mtu); + mlx5i_build_nic_params(mdev, &priv->channels.params); + + mlx5e_timestamp_init(priv); + + /* netdev init */ + netdev->hw_features |= NETIF_F_SG; + netdev->hw_features |= NETIF_F_IP_CSUM; + netdev->hw_features |= NETIF_F_IPV6_CSUM; + netdev->hw_features |= NETIF_F_GRO; + netdev->hw_features |= NETIF_F_TSO; + netdev->hw_features |= NETIF_F_TSO6; + netdev->hw_features |= NETIF_F_RXCSUM; + netdev->hw_features |= NETIF_F_RXHASH; + + netdev->netdev_ops = &mlx5i_netdev_ops; + netdev->ethtool_ops = &mlx5i_ethtool_ops; + + return 0; +} + +/* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */ +void mlx5i_cleanup(struct mlx5e_priv *priv) +{ + mlx5e_priv_cleanup(priv); +} + +static void mlx5i_grp_sw_update_stats(struct mlx5e_priv *priv) +{ + struct rtnl_link_stats64 s = {}; + int i, j; + + for (i = 0; i < priv->stats_nch; i++) { + struct mlx5e_channel_stats *channel_stats; + struct mlx5e_rq_stats *rq_stats; + + channel_stats = priv->channel_stats[i]; + rq_stats = &channel_stats->rq; + + s.rx_packets += rq_stats->packets; + s.rx_bytes += rq_stats->bytes; + + for (j = 0; j < priv->max_opened_tc; j++) { + struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j]; + + s.tx_packets += sq_stats->packets; + s.tx_bytes += sq_stats->bytes; + s.tx_dropped += sq_stats->dropped; + } + } + + memset(&priv->stats.sw, 0, sizeof(s)); + + priv->stats.sw.rx_packets = s.rx_packets; + priv->stats.sw.rx_bytes = s.rx_bytes; + priv->stats.sw.tx_packets = s.tx_packets; + priv->stats.sw.tx_bytes = s.tx_bytes; + priv->stats.sw.tx_queue_dropped = s.tx_dropped; +} + +void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5e_sw_stats *sstats = &priv->stats.sw; + + mlx5i_grp_sw_update_stats(priv); + + stats->rx_packets = sstats->rx_packets; + stats->rx_bytes = sstats->rx_bytes; + stats->tx_packets = sstats->tx_packets; + stats->tx_bytes = sstats->tx_bytes; + stats->tx_dropped = sstats->tx_queue_dropped; +} + +struct net_device *mlx5i_parent_get(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv, *parent_ipriv; + struct net_device *parent_dev; + int parent_ifindex; + + ipriv = priv->ppriv; + + parent_ifindex = netdev->netdev_ops->ndo_get_iflink(netdev); + parent_dev = dev_get_by_index(dev_net(netdev), parent_ifindex); + if (!parent_dev) + return NULL; + + parent_ipriv = netdev_priv(parent_dev); + + ASSERT_RTNL(); + parent_ipriv->num_sub_interfaces++; + + ipriv->parent_dev = parent_dev; + + return parent_dev; +} + +void mlx5i_parent_put(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv, *parent_ipriv; + + ipriv = priv->ppriv; + parent_ipriv = netdev_priv(ipriv->parent_dev); + + ASSERT_RTNL(); + parent_ipriv->num_sub_interfaces--; + + dev_put(ipriv->parent_dev); +} + +int mlx5i_init_underlay_qp(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5i_priv *ipriv = priv->ppriv; + int ret; + + { + u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {}; + u32 *qpc; + + qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc); + + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, primary_address_path.pkey_index, + ipriv->pkey_index); + MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, 1); + MLX5_SET(qpc, qpc, q_key, IB_DEFAULT_Q_KEY); + + MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP); + MLX5_SET(rst2init_qp_in, in, qpn, ipriv->qpn); + ret = mlx5_cmd_exec_in(mdev, rst2init_qp, in); + if (ret) + goto err_qp_modify_to_err; + } + { + u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {}; + + MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP); + MLX5_SET(init2rtr_qp_in, in, qpn, ipriv->qpn); + ret = mlx5_cmd_exec_in(mdev, init2rtr_qp, in); + if (ret) + goto err_qp_modify_to_err; + } + { + u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {}; + + MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP); + MLX5_SET(rtr2rts_qp_in, in, qpn, ipriv->qpn); + ret = mlx5_cmd_exec_in(mdev, rtr2rts_qp, in); + if (ret) + goto err_qp_modify_to_err; + } + return 0; + +err_qp_modify_to_err: + { + u32 in[MLX5_ST_SZ_DW(qp_2err_in)] = {}; + + MLX5_SET(qp_2err_in, in, opcode, MLX5_CMD_OP_2ERR_QP); + MLX5_SET(qp_2err_in, in, qpn, ipriv->qpn); + mlx5_cmd_exec_in(mdev, qp_2err, in); + } + return ret; +} + +void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(qp_2rst_in)] = {}; + + MLX5_SET(qp_2rst_in, in, opcode, MLX5_CMD_OP_2RST_QP); + MLX5_SET(qp_2rst_in, in, qpn, ipriv->qpn); + mlx5_cmd_exec_in(mdev, qp_2rst, in); +} + +#define MLX5_QP_ENHANCED_ULP_STATELESS_MODE 2 + +int mlx5i_create_underlay_qp(struct mlx5e_priv *priv) +{ + const unsigned char *dev_addr = priv->netdev->dev_addr; + u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; + u32 in[MLX5_ST_SZ_DW(create_qp_in)] = {}; + struct mlx5i_priv *ipriv = priv->ppriv; + void *addr_path; + int qpn = 0; + int ret = 0; + void *qpc; + + if (MLX5_CAP_GEN(priv->mdev, mkey_by_name)) { + qpn = (dev_addr[1] << 16) + (dev_addr[2] << 8) + dev_addr[3]; + MLX5_SET(create_qp_in, in, input_qpn, qpn); + } + + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(priv->mdev)); + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_UD); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, + MLX5_QP_ENHANCED_ULP_STATELESS_MODE); + + addr_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path); + MLX5_SET(ads, addr_path, vhca_port_num, 1); + MLX5_SET(ads, addr_path, grh, 1); + + MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); + ret = mlx5_cmd_exec_inout(priv->mdev, create_qp, in, out); + if (ret) + return ret; + + ipriv->qpn = MLX5_GET(create_qp_out, out, qpn); + + return 0; +} + +void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, u32 qpn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; + + MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, in, qpn, qpn); + mlx5_cmd_exec_in(mdev, destroy_qp, in); +} + +int mlx5i_update_nic_rx(struct mlx5e_priv *priv) +{ + return mlx5e_refresh_tirs(priv, true, true); +} + +int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn) +{ + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; + void *tisc; + + tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + + MLX5_SET(tisc, tisc, underlay_qpn, underlay_qpn); + + return mlx5e_create_tis(mdev, in, tisn); +} + +static int mlx5i_init_tx(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + int err; + + err = mlx5i_create_underlay_qp(priv); + if (err) { + mlx5_core_warn(priv->mdev, "create underlay QP failed, %d\n", err); + return err; + } + + err = mlx5i_create_tis(priv->mdev, ipriv->qpn, &priv->tisn[0][0]); + if (err) { + mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err); + goto err_destroy_underlay_qp; + } + + return 0; + +err_destroy_underlay_qp: + mlx5i_destroy_underlay_qp(priv->mdev, ipriv->qpn); + return err; +} + +static void mlx5i_cleanup_tx(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + + mlx5e_destroy_tis(priv->mdev, priv->tisn[0][0]); + mlx5i_destroy_underlay_qp(priv->mdev, ipriv->qpn); +} + +static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) +{ + struct mlx5_flow_namespace *ns = + mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); + int err; + + + if (!ns) + return -EINVAL; + + mlx5e_fs_set_ns(priv->fs, ns, false); + err = mlx5e_arfs_create_tables(priv->fs, priv->rx_res, + !!(priv->netdev->hw_features & NETIF_F_NTUPLE)); + if (err) { + netdev_err(priv->netdev, "Failed to create arfs tables, err=%d\n", + err); + priv->netdev->hw_features &= ~NETIF_F_NTUPLE; + } + + err = mlx5e_create_ttc_table(priv->fs, priv->rx_res); + if (err) { + netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", + err); + goto err_destroy_arfs_tables; + } + + mlx5e_ethtool_init_steering(priv->fs); + + return 0; + +err_destroy_arfs_tables: + mlx5e_arfs_destroy_tables(priv->fs, + !!(priv->netdev->hw_features & NETIF_F_NTUPLE)); + + return err; +} + +static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) +{ + mlx5e_destroy_ttc_table(priv->fs); + mlx5e_arfs_destroy_tables(priv->fs, + !!(priv->netdev->hw_features & NETIF_F_NTUPLE)); + mlx5e_ethtool_cleanup_steering(priv->fs); +} + +static int mlx5i_init_rx(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + priv->fs = mlx5e_fs_init(priv->profile, mdev, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state), + priv->dfs_root); + if (!priv->fs) { + netdev_err(priv->netdev, "FS allocation failed\n"); + return -ENOMEM; + } + + priv->rx_res = mlx5e_rx_res_alloc(); + if (!priv->rx_res) { + err = -ENOMEM; + goto err_free_fs; + } + + mlx5e_create_q_counters(priv); + + err = mlx5e_open_drop_rq(priv, &priv->drop_rq); + if (err) { + mlx5_core_err(mdev, "open drop rq failed, %d\n", err); + goto err_destroy_q_counters; + } + + err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0, + priv->max_nch, priv->drop_rq.rqn, + &priv->channels.params.packet_merge, + priv->channels.params.num_channels); + if (err) + goto err_close_drop_rq; + + err = mlx5i_create_flow_steering(priv); + if (err) + goto err_destroy_rx_res; + + return 0; + +err_destroy_rx_res: + mlx5e_rx_res_destroy(priv->rx_res); +err_close_drop_rq: + mlx5e_close_drop_rq(&priv->drop_rq); +err_destroy_q_counters: + mlx5e_destroy_q_counters(priv); + mlx5e_rx_res_free(priv->rx_res); + priv->rx_res = NULL; +err_free_fs: + mlx5e_fs_cleanup(priv->fs); + return err; +} + +static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) +{ + mlx5i_destroy_flow_steering(priv); + mlx5e_rx_res_destroy(priv->rx_res); + mlx5e_close_drop_rq(&priv->drop_rq); + mlx5e_destroy_q_counters(priv); + mlx5e_rx_res_free(priv->rx_res); + priv->rx_res = NULL; + mlx5e_fs_cleanup(priv->fs); +} + +/* The stats groups order is opposite to the update_stats() order calls */ +static mlx5e_stats_grp_t mlx5i_stats_grps[] = { + &MLX5E_STATS_GRP(sw), + &MLX5E_STATS_GRP(qcnt), + &MLX5E_STATS_GRP(vnic_env), + &MLX5E_STATS_GRP(vport), + &MLX5E_STATS_GRP(802_3), + &MLX5E_STATS_GRP(2863), + &MLX5E_STATS_GRP(2819), + &MLX5E_STATS_GRP(phy), + &MLX5E_STATS_GRP(pcie), + &MLX5E_STATS_GRP(per_prio), + &MLX5E_STATS_GRP(pme), + &MLX5E_STATS_GRP(channels), + &MLX5E_STATS_GRP(per_port_buff_congest), +}; + +static unsigned int mlx5i_stats_grps_num(struct mlx5e_priv *priv) +{ + return ARRAY_SIZE(mlx5i_stats_grps); +} + +static const struct mlx5e_profile mlx5i_nic_profile = { + .init = mlx5i_init, + .cleanup = mlx5i_cleanup, + .init_tx = mlx5i_init_tx, + .cleanup_tx = mlx5i_cleanup_tx, + .init_rx = mlx5i_init_rx, + .cleanup_rx = mlx5i_cleanup_rx, + .enable = NULL, /* mlx5i_enable */ + .disable = NULL, /* mlx5i_disable */ + .update_rx = mlx5i_update_nic_rx, + .update_stats = NULL, /* mlx5i_update_stats */ + .update_carrier = NULL, /* no HW update in IB link */ + .rx_handlers = &mlx5i_rx_handlers, + .max_tc = MLX5I_MAX_NUM_TC, + .stats_grps = mlx5i_stats_grps, + .stats_grps_num = mlx5i_stats_grps_num, +}; + +/* mlx5i netdev NDos */ + +static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5e_params new_params; + int err = 0; + + mutex_lock(&priv->state_lock); + + new_params = priv->channels.params; + new_params.sw_mtu = new_mtu; + + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); + if (err) + goto out; + + netdev->mtu = new_params.sw_mtu; + +out: + mutex_unlock(&priv->state_lock); + return err; +} + +int mlx5i_dev_init(struct net_device *dev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5i_priv *ipriv = priv->ppriv; + u8 addr_mod[3]; + + /* Set dev address using underlay QP */ + addr_mod[0] = (ipriv->qpn >> 16) & 0xff; + addr_mod[1] = (ipriv->qpn >> 8) & 0xff; + addr_mod[2] = (ipriv->qpn) & 0xff; + dev_addr_mod(dev, 1, addr_mod, sizeof(addr_mod)); + + /* Add QPN to net-device mapping to HT */ + mlx5i_pkey_add_qpn(dev, ipriv->qpn); + + return 0; +} + +int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + switch (cmd) { + case SIOCSHWTSTAMP: + return mlx5e_hwstamp_set(priv, ifr); + case SIOCGHWTSTAMP: + return mlx5e_hwstamp_get(priv, ifr); + default: + return -EOPNOTSUPP; + } +} + +void mlx5i_dev_cleanup(struct net_device *dev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5i_priv *ipriv = priv->ppriv; + + mlx5i_uninit_underlay_qp(priv); + + /* Delete QPN to net-device mapping from HT */ + mlx5i_pkey_del_qpn(dev, ipriv->qpn); +} + +static int mlx5i_open(struct net_device *netdev) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = epriv->ppriv; + struct mlx5_core_dev *mdev = epriv->mdev; + int err; + + mutex_lock(&epriv->state_lock); + + set_bit(MLX5E_STATE_OPENED, &epriv->state); + + err = mlx5i_init_underlay_qp(epriv); + if (err) { + mlx5_core_warn(mdev, "prepare underlay qp state failed, %d\n", err); + goto err_clear_state_opened_flag; + } + + err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qpn); + if (err) { + mlx5_core_warn(mdev, "attach underlay qp to ft failed, %d\n", err); + goto err_reset_qp; + } + + err = mlx5e_open_channels(epriv, &epriv->channels); + if (err) + goto err_remove_fs_underlay_qp; + + err = epriv->profile->update_rx(epriv); + if (err) + goto err_close_channels; + + mlx5e_activate_priv_channels(epriv); + + mutex_unlock(&epriv->state_lock); + return 0; + +err_close_channels: + mlx5e_close_channels(&epriv->channels); +err_remove_fs_underlay_qp: + mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qpn); +err_reset_qp: + mlx5i_uninit_underlay_qp(epriv); +err_clear_state_opened_flag: + clear_bit(MLX5E_STATE_OPENED, &epriv->state); + mutex_unlock(&epriv->state_lock); + return err; +} + +static int mlx5i_close(struct net_device *netdev) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = epriv->ppriv; + struct mlx5_core_dev *mdev = epriv->mdev; + + /* May already be CLOSED in case a previous configuration operation + * (e.g RX/TX queue size change) that involves close&open failed. + */ + mutex_lock(&epriv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &epriv->state)) + goto unlock; + + clear_bit(MLX5E_STATE_OPENED, &epriv->state); + + netif_carrier_off(epriv->netdev); + mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qpn); + mlx5e_deactivate_priv_channels(epriv); + mlx5e_close_channels(&epriv->channels); + mlx5i_uninit_underlay_qp(epriv); +unlock: + mutex_unlock(&epriv->state_lock); + return 0; +} + +/* IPoIB RDMA netdev callbacks */ +static int mlx5i_attach_mcast(struct net_device *netdev, struct ib_device *hca, + union ib_gid *gid, u16 lid, int set_qkey, + u32 qkey) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5_core_dev *mdev = epriv->mdev; + struct mlx5i_priv *ipriv = epriv->ppriv; + int err; + + mlx5_core_dbg(mdev, "attaching QPN 0x%x, MGID %pI6\n", ipriv->qpn, + gid->raw); + err = mlx5_core_attach_mcg(mdev, gid, ipriv->qpn); + if (err) + mlx5_core_warn(mdev, "failed attaching QPN 0x%x, MGID %pI6\n", + ipriv->qpn, gid->raw); + + if (set_qkey) { + mlx5_core_dbg(mdev, "%s setting qkey 0x%x\n", + netdev->name, qkey); + ipriv->qkey = qkey; + } + + return err; +} + +static int mlx5i_detach_mcast(struct net_device *netdev, struct ib_device *hca, + union ib_gid *gid, u16 lid) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5_core_dev *mdev = epriv->mdev; + struct mlx5i_priv *ipriv = epriv->ppriv; + int err; + + mlx5_core_dbg(mdev, "detaching QPN 0x%x, MGID %pI6\n", ipriv->qpn, + gid->raw); + + err = mlx5_core_detach_mcg(mdev, gid, ipriv->qpn); + if (err) + mlx5_core_dbg(mdev, "failed detaching QPN 0x%x, MGID %pI6\n", + ipriv->qpn, gid->raw); + + return err; +} + +static int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb, + struct ib_ah *address, u32 dqpn) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(dev); + struct mlx5e_txqsq *sq = epriv->txq2sq[skb_get_queue_mapping(skb)]; + struct mlx5_ib_ah *mah = to_mah(address); + struct mlx5i_priv *ipriv = epriv->ppriv; + + mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey, netdev_xmit_more()); + + return NETDEV_TX_OK; +} + +static void mlx5i_set_pkey_index(struct net_device *netdev, int id) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + + ipriv->pkey_index = (u16)id; +} + +static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev) +{ + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_IB) + return -EOPNOTSUPP; + + if (!MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) { + mlx5_core_warn(mdev, "IPoIB enhanced offloads are not supported\n"); + return -EOPNOTSUPP; + } + + return 0; +} + +static void mlx5_rdma_netdev_free(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5i_priv *ipriv = priv->ppriv; + const struct mlx5e_profile *profile = priv->profile; + + mlx5e_detach_netdev(priv); + profile->cleanup(priv); + + if (!ipriv->sub_interface) { + mlx5i_pkey_qpn_ht_cleanup(netdev); + mlx5e_destroy_mdev_resources(mdev); + } +} + +static bool mlx5_is_sub_interface(struct mlx5_core_dev *mdev) +{ + return mdev->mlx5e_res.hw_objs.pdn != 0; +} + +static const struct mlx5e_profile *mlx5_get_profile(struct mlx5_core_dev *mdev) +{ + if (mlx5_is_sub_interface(mdev)) + return mlx5i_pkey_get_profile(); + return &mlx5i_nic_profile; +} + +static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u32 port_num, + struct net_device *netdev, void *param) +{ + struct mlx5_core_dev *mdev = (struct mlx5_core_dev *)param; + const struct mlx5e_profile *prof = mlx5_get_profile(mdev); + struct mlx5i_priv *ipriv; + struct mlx5e_priv *epriv; + struct rdma_netdev *rn; + int err; + + ipriv = netdev_priv(netdev); + epriv = mlx5i_epriv(netdev); + + ipriv->sub_interface = mlx5_is_sub_interface(mdev); + if (!ipriv->sub_interface) { + err = mlx5i_pkey_qpn_ht_init(netdev); + if (err) { + mlx5_core_warn(mdev, "allocate qpn_to_netdev ht failed\n"); + return err; + } + + /* This should only be called once per mdev */ + err = mlx5e_create_mdev_resources(mdev); + if (err) + goto destroy_ht; + } + + err = mlx5e_priv_init(epriv, prof, netdev, mdev); + if (err) + goto destroy_mdev_resources; + + epriv->profile = prof; + epriv->ppriv = ipriv; + + prof->init(mdev, netdev); + + err = mlx5e_attach_netdev(epriv); + if (err) + goto detach; + netif_carrier_off(netdev); + + /* set rdma_netdev func pointers */ + rn = &ipriv->rn; + rn->hca = ibdev; + rn->send = mlx5i_xmit; + rn->attach_mcast = mlx5i_attach_mcast; + rn->detach_mcast = mlx5i_detach_mcast; + rn->set_id = mlx5i_set_pkey_index; + + netdev->priv_destructor = mlx5_rdma_netdev_free; + netdev->needs_free_netdev = 1; + + return 0; + +detach: + prof->cleanup(epriv); + if (ipriv->sub_interface) + return err; +destroy_mdev_resources: + mlx5e_destroy_mdev_resources(mdev); +destroy_ht: + mlx5i_pkey_qpn_ht_cleanup(netdev); + return err; +} + +int mlx5_rdma_rn_get_params(struct mlx5_core_dev *mdev, + struct ib_device *device, + struct rdma_netdev_alloc_params *params) +{ + int nch; + int rc; + + rc = mlx5i_check_required_hca_cap(mdev); + if (rc) + return rc; + + nch = mlx5e_get_max_num_channels(mdev); + + *params = (struct rdma_netdev_alloc_params){ + .sizeof_priv = sizeof(struct mlx5i_priv) + + sizeof(struct mlx5e_priv), + .txqs = nch * MLX5E_MAX_NUM_TC, + .rxqs = nch, + .param = mdev, + .initialize_rdma_netdev = mlx5_rdma_setup_rn, + }; + + return 0; +} +EXPORT_SYMBOL(mlx5_rdma_rn_get_params); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h new file mode 100644 index 0000000000..f3f2af9720 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5E_IPOB_H__ +#define __MLX5E_IPOB_H__ + +#ifdef CONFIG_MLX5_CORE_IPOIB + +#include +#include "en.h" + +#define MLX5I_MAX_NUM_TC 1 + +extern const struct ethtool_ops mlx5i_ethtool_ops; +extern const struct ethtool_ops mlx5i_pkey_ethtool_ops; +extern const struct mlx5e_rx_handlers mlx5i_rx_handlers; + +#define MLX5_IB_GRH_BYTES 40 +#define MLX5_IPOIB_ENCAP_LEN 4 +#define MLX5_IPOIB_PSEUDO_LEN 20 +#define MLX5_IPOIB_HARD_LEN (MLX5_IPOIB_PSEUDO_LEN + MLX5_IPOIB_ENCAP_LEN) + +/* ipoib rdma netdev's private data structure */ +struct mlx5i_priv { + struct rdma_netdev rn; /* keep this first */ + u32 qpn; + bool sub_interface; + u32 num_sub_interfaces; + u32 qkey; + u16 pkey_index; + struct mlx5i_pkey_qpn_ht *qpn_htbl; + struct net_device *parent_dev; + char *mlx5e_priv[]; +}; + +int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn); + +/* Underlay QP create/destroy functions */ +int mlx5i_create_underlay_qp(struct mlx5e_priv *priv); +void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, u32 qpn); + +/* Underlay QP state modification init/uninit functions */ +int mlx5i_init_underlay_qp(struct mlx5e_priv *priv); +void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv); + +/* Allocate/Free underlay QPN to net-device hash table */ +int mlx5i_pkey_qpn_ht_init(struct net_device *netdev); +void mlx5i_pkey_qpn_ht_cleanup(struct net_device *netdev); + +/* Add/Remove an underlay QPN to net-device mapping to/from the hash table */ +int mlx5i_pkey_add_qpn(struct net_device *netdev, u32 qpn); +int mlx5i_pkey_del_qpn(struct net_device *netdev, u32 qpn); + +/* Get the net-device corresponding to the given underlay QPN */ +struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn); + +/* Shared ndo functions */ +int mlx5i_dev_init(struct net_device *dev); +void mlx5i_dev_cleanup(struct net_device *dev); +int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); + +/* Parent profile functions */ +int mlx5i_init(struct mlx5_core_dev *mdev, struct net_device *netdev); +void mlx5i_cleanup(struct mlx5e_priv *priv); + +int mlx5i_update_nic_rx(struct mlx5e_priv *priv); + +/* Get child interface nic profile */ +const struct mlx5e_profile *mlx5i_pkey_get_profile(void); + +/* Extract mlx5e_priv from IPoIB netdev */ +#define mlx5i_epriv(netdev) ((void *)(((struct mlx5i_priv *)netdev_priv(netdev))->mlx5e_priv)) + +struct mlx5_wqe_eth_pad { + u8 rsvd0[16]; +}; + +struct mlx5i_tx_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_datagram_seg datagram; + struct mlx5_wqe_eth_pad pad; + struct mlx5_wqe_eth_seg eth; + struct mlx5_wqe_data_seg data[]; +}; + +#define MLX5I_SQ_FETCH_WQE(sq, pi) \ + ((struct mlx5i_tx_wqe *)mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5i_tx_wqe))) + +void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more); +void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); + +/* Reference management for child to parent interfaces. */ +struct net_device *mlx5i_parent_get(struct net_device *netdev); +void mlx5i_parent_put(struct net_device *netdev); + +#endif /* CONFIG_MLX5_CORE_IPOIB */ +#endif /* __MLX5E_IPOB_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c new file mode 100644 index 0000000000..03e6812979 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -0,0 +1,369 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include "ipoib.h" + +#define MLX5I_MAX_LOG_PKEY_SUP 7 + +struct qpn_to_netdev { + struct net_device *netdev; + struct hlist_node hlist; + u32 underlay_qpn; +}; + +struct mlx5i_pkey_qpn_ht { + struct hlist_head buckets[1 << MLX5I_MAX_LOG_PKEY_SUP]; + spinlock_t ht_lock; /* Synchronise with NAPI */ +}; + +int mlx5i_pkey_qpn_ht_init(struct net_device *netdev) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + struct mlx5i_pkey_qpn_ht *qpn_htbl; + + qpn_htbl = kzalloc(sizeof(*qpn_htbl), GFP_KERNEL); + if (!qpn_htbl) + return -ENOMEM; + + ipriv->qpn_htbl = qpn_htbl; + spin_lock_init(&qpn_htbl->ht_lock); + + return 0; +} + +void mlx5i_pkey_qpn_ht_cleanup(struct net_device *netdev) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + + kfree(ipriv->qpn_htbl); +} + +static struct qpn_to_netdev *mlx5i_find_qpn_to_netdev_node(struct hlist_head *buckets, + u32 qpn) +{ + struct hlist_head *h = &buckets[hash_32(qpn, MLX5I_MAX_LOG_PKEY_SUP)]; + struct qpn_to_netdev *node; + + hlist_for_each_entry(node, h, hlist) { + if (node->underlay_qpn == qpn) + return node; + } + + return NULL; +} + +int mlx5i_pkey_add_qpn(struct net_device *netdev, u32 qpn) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + struct mlx5i_pkey_qpn_ht *ht = ipriv->qpn_htbl; + u8 key = hash_32(qpn, MLX5I_MAX_LOG_PKEY_SUP); + struct qpn_to_netdev *new_node; + + new_node = kzalloc(sizeof(*new_node), GFP_KERNEL); + if (!new_node) + return -ENOMEM; + + new_node->netdev = netdev; + new_node->underlay_qpn = qpn; + spin_lock_bh(&ht->ht_lock); + hlist_add_head(&new_node->hlist, &ht->buckets[key]); + spin_unlock_bh(&ht->ht_lock); + + return 0; +} + +int mlx5i_pkey_del_qpn(struct net_device *netdev, u32 qpn) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = epriv->ppriv; + struct mlx5i_pkey_qpn_ht *ht = ipriv->qpn_htbl; + struct qpn_to_netdev *node; + + node = mlx5i_find_qpn_to_netdev_node(ht->buckets, qpn); + if (!node) { + mlx5_core_warn(epriv->mdev, "QPN to netdev delete from HT failed\n"); + return -EINVAL; + } + + spin_lock_bh(&ht->ht_lock); + hlist_del_init(&node->hlist); + spin_unlock_bh(&ht->ht_lock); + kfree(node); + + return 0; +} + +struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + struct qpn_to_netdev *node; + + node = mlx5i_find_qpn_to_netdev_node(ipriv->qpn_htbl->buckets, qpn); + if (!node) + return NULL; + + return node->netdev; +} + +static int mlx5i_pkey_open(struct net_device *netdev); +static int mlx5i_pkey_close(struct net_device *netdev); +static int mlx5i_pkey_dev_init(struct net_device *dev); +static void mlx5i_pkey_dev_cleanup(struct net_device *netdev); +static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu); +static int mlx5i_pkey_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); + +static const struct net_device_ops mlx5i_pkey_netdev_ops = { + .ndo_open = mlx5i_pkey_open, + .ndo_stop = mlx5i_pkey_close, + .ndo_init = mlx5i_pkey_dev_init, + .ndo_get_stats64 = mlx5i_get_stats, + .ndo_uninit = mlx5i_pkey_dev_cleanup, + .ndo_change_mtu = mlx5i_pkey_change_mtu, + .ndo_eth_ioctl = mlx5i_pkey_ioctl, +}; + +/* Child NDOs */ +static int mlx5i_pkey_dev_init(struct net_device *dev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5i_priv *ipriv, *parent_ipriv; + struct net_device *parent_dev; + + ipriv = priv->ppriv; + + /* Link to parent */ + parent_dev = mlx5i_parent_get(dev); + if (!parent_dev) { + mlx5_core_warn(priv->mdev, "failed to get parent device\n"); + return -EINVAL; + } + + if (dev->num_rx_queues < parent_dev->real_num_rx_queues) { + mlx5_core_warn(priv->mdev, + "failed to create child device with rx queues [%d] less than parent's [%d]\n", + dev->num_rx_queues, + parent_dev->real_num_rx_queues); + mlx5i_parent_put(dev); + return -EINVAL; + } + + /* Get QPN to netdevice hash table from parent */ + parent_ipriv = netdev_priv(parent_dev); + ipriv->qpn_htbl = parent_ipriv->qpn_htbl; + + return mlx5i_dev_init(dev); +} + +static int mlx5i_pkey_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + return mlx5i_ioctl(dev, ifr, cmd); +} + +static void mlx5i_pkey_dev_cleanup(struct net_device *netdev) +{ + mlx5i_parent_put(netdev); + return mlx5i_dev_cleanup(netdev); +} + +static int mlx5i_pkey_open(struct net_device *netdev) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = epriv->ppriv; + struct mlx5_core_dev *mdev = epriv->mdev; + int err; + + mutex_lock(&epriv->state_lock); + + set_bit(MLX5E_STATE_OPENED, &epriv->state); + + err = mlx5i_init_underlay_qp(epriv); + if (err) { + mlx5_core_warn(mdev, "prepare child underlay qp state failed, %d\n", err); + goto err_release_lock; + } + + err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qpn); + if (err) { + mlx5_core_warn(mdev, "attach child underlay qp to ft failed, %d\n", err); + goto err_unint_underlay_qp; + } + + err = mlx5i_create_tis(mdev, ipriv->qpn, &epriv->tisn[0][0]); + if (err) { + mlx5_core_warn(mdev, "create child tis failed, %d\n", err); + goto err_remove_rx_uderlay_qp; + } + + err = mlx5e_open_channels(epriv, &epriv->channels); + if (err) { + mlx5_core_warn(mdev, "opening child channels failed, %d\n", err); + goto err_clear_state_opened_flag; + } + err = epriv->profile->update_rx(epriv); + if (err) + goto err_close_channels; + mlx5e_activate_priv_channels(epriv); + mutex_unlock(&epriv->state_lock); + + return 0; + +err_close_channels: + mlx5e_close_channels(&epriv->channels); +err_clear_state_opened_flag: + mlx5e_destroy_tis(mdev, epriv->tisn[0][0]); +err_remove_rx_uderlay_qp: + mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qpn); +err_unint_underlay_qp: + mlx5i_uninit_underlay_qp(epriv); +err_release_lock: + clear_bit(MLX5E_STATE_OPENED, &epriv->state); + mutex_unlock(&epriv->state_lock); + return err; +} + +static int mlx5i_pkey_close(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = priv->ppriv; + struct mlx5_core_dev *mdev = priv->mdev; + + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + clear_bit(MLX5E_STATE_OPENED, &priv->state); + + netif_carrier_off(priv->netdev); + mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qpn); + mlx5i_uninit_underlay_qp(priv); + mlx5e_deactivate_priv_channels(priv); + mlx5e_close_channels(&priv->channels); + mlx5e_destroy_tis(mdev, priv->tisn[0][0]); +unlock: + mutex_unlock(&priv->state_lock); + return 0; +} + +static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + mutex_lock(&priv->state_lock); + netdev->mtu = new_mtu; + mutex_unlock(&priv->state_lock); + + return 0; +} + +/* Called directly after IPoIB netdevice was created to initialize SW structs */ +static int mlx5i_pkey_init(struct mlx5_core_dev *mdev, + struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + int err; + + err = mlx5i_init(mdev, netdev); + if (err) + return err; + + /* Override parent ndo */ + netdev->netdev_ops = &mlx5i_pkey_netdev_ops; + + /* Set child limited ethtool support */ + netdev->ethtool_ops = &mlx5i_pkey_ethtool_ops; + + /* Use dummy rqs */ + priv->channels.params.log_rq_mtu_frames = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; + + return 0; +} + +/* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */ +static void mlx5i_pkey_cleanup(struct mlx5e_priv *priv) +{ + mlx5i_cleanup(priv); +} + +static int mlx5i_pkey_init_tx(struct mlx5e_priv *priv) +{ + int err; + + err = mlx5i_create_underlay_qp(priv); + if (err) + mlx5_core_warn(priv->mdev, "create child underlay QP failed, %d\n", err); + + return err; +} + +static void mlx5i_pkey_cleanup_tx(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + + mlx5i_destroy_underlay_qp(priv->mdev, ipriv->qpn); +} + +static int mlx5i_pkey_init_rx(struct mlx5e_priv *priv) +{ + /* Since the rx resources are shared between child and parent, the + * parent interface is taking care of rx resource allocation and init + */ + return 0; +} + +static void mlx5i_pkey_cleanup_rx(struct mlx5e_priv *priv) +{ + /* Since the rx resources are shared between child and parent, the + * parent interface is taking care of rx resource free and de-init + */ +} + +static const struct mlx5e_profile mlx5i_pkey_nic_profile = { + .init = mlx5i_pkey_init, + .cleanup = mlx5i_pkey_cleanup, + .init_tx = mlx5i_pkey_init_tx, + .cleanup_tx = mlx5i_pkey_cleanup_tx, + .init_rx = mlx5i_pkey_init_rx, + .cleanup_rx = mlx5i_pkey_cleanup_rx, + .enable = NULL, + .disable = NULL, + .update_rx = mlx5i_update_nic_rx, + .update_stats = NULL, + .rx_handlers = &mlx5i_rx_handlers, + .max_tc = MLX5I_MAX_NUM_TC, +}; + +const struct mlx5e_profile *mlx5i_pkey_get_profile(void) +{ + return &mlx5i_pkey_nic_profile; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c new file mode 100644 index 0000000000..612e666ec2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "mlx5_core.h" +#include "mlx5_irq.h" +#include "pci_irq.h" + +static void cpu_put(struct mlx5_irq_pool *pool, int cpu) +{ + pool->irqs_per_cpu[cpu]--; +} + +static void cpu_get(struct mlx5_irq_pool *pool, int cpu) +{ + pool->irqs_per_cpu[cpu]++; +} + +/* Gets the least loaded CPU. e.g.: the CPU with least IRQs bound to it */ +static int cpu_get_least_loaded(struct mlx5_irq_pool *pool, + const struct cpumask *req_mask) +{ + int best_cpu = -1; + int cpu; + + for_each_cpu_and(cpu, req_mask, cpu_online_mask) { + /* CPU has zero IRQs on it. No need to search any more CPUs. */ + if (!pool->irqs_per_cpu[cpu]) { + best_cpu = cpu; + break; + } + if (best_cpu < 0) + best_cpu = cpu; + if (pool->irqs_per_cpu[cpu] < pool->irqs_per_cpu[best_cpu]) + best_cpu = cpu; + } + if (best_cpu == -1) { + /* There isn't online CPUs in req_mask */ + mlx5_core_err(pool->dev, "NO online CPUs in req_mask (%*pbl)\n", + cpumask_pr_args(req_mask)); + best_cpu = cpumask_first(cpu_online_mask); + } + pool->irqs_per_cpu[best_cpu]++; + return best_cpu; +} + +/* Creating an IRQ from irq_pool */ +static struct mlx5_irq * +irq_pool_request_irq(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc) +{ + struct irq_affinity_desc auto_desc = {}; + u32 irq_index; + int err; + + err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs, GFP_KERNEL); + if (err) + return ERR_PTR(err); + if (pool->irqs_per_cpu) { + if (cpumask_weight(&af_desc->mask) > 1) + /* if req_mask contain more then one CPU, set the least loadad CPU + * of req_mask + */ + cpumask_set_cpu(cpu_get_least_loaded(pool, &af_desc->mask), + &auto_desc.mask); + else + cpu_get(pool, cpumask_first(&af_desc->mask)); + } + return mlx5_irq_alloc(pool, irq_index, + cpumask_empty(&auto_desc.mask) ? af_desc : &auto_desc, + NULL); +} + +/* Looking for the IRQ with the smallest refcount that fits req_mask. + * If pool is sf_comp_pool, then we are looking for an IRQ with any of the + * requested CPUs in req_mask. + * for example: req_mask = 0xf, irq0_mask = 0x10, irq1_mask = 0x1. irq0_mask + * isn't subset of req_mask, so we will skip it. irq1_mask is subset of req_mask, + * we don't skip it. + * If pool is sf_ctrl_pool, then all IRQs have the same mask, so any IRQ will + * fit. And since mask is subset of itself, we will pass the first if bellow. + */ +static struct mlx5_irq * +irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) +{ + int start = pool->xa_num_irqs.min; + int end = pool->xa_num_irqs.max; + struct mlx5_irq *irq = NULL; + struct mlx5_irq *iter; + int irq_refcount = 0; + unsigned long index; + + lockdep_assert_held(&pool->lock); + xa_for_each_range(&pool->irqs, index, iter, start, end) { + struct cpumask *iter_mask = mlx5_irq_get_affinity_mask(iter); + int iter_refcount = mlx5_irq_read_locked(iter); + + if (!cpumask_subset(iter_mask, req_mask)) + /* skip IRQs with a mask which is not subset of req_mask */ + continue; + if (iter_refcount < pool->min_threshold) + /* If we found an IRQ with less than min_thres, return it */ + return iter; + if (!irq || iter_refcount < irq_refcount) { + /* In case we won't find an IRQ with less than min_thres, + * keep a pointer to the least used IRQ + */ + irq_refcount = iter_refcount; + irq = iter; + } + } + return irq; +} + +/** + * mlx5_irq_affinity_request - request an IRQ according to the given mask. + * @pool: IRQ pool to request from. + * @af_desc: affinity descriptor for this IRQ. + * + * This function returns a pointer to IRQ, or ERR_PTR in case of error. + */ +struct mlx5_irq * +mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc) +{ + struct mlx5_irq *least_loaded_irq, *new_irq; + + mutex_lock(&pool->lock); + least_loaded_irq = irq_pool_find_least_loaded(pool, &af_desc->mask); + if (least_loaded_irq && + mlx5_irq_read_locked(least_loaded_irq) < pool->min_threshold) + goto out; + /* We didn't find an IRQ with less than min_thres, try to allocate a new IRQ */ + new_irq = irq_pool_request_irq(pool, af_desc); + if (IS_ERR(new_irq)) { + if (!least_loaded_irq) { + /* We failed to create an IRQ and we didn't find an IRQ */ + mlx5_core_err(pool->dev, "Didn't find a matching IRQ. err = %ld\n", + PTR_ERR(new_irq)); + mutex_unlock(&pool->lock); + return new_irq; + } + /* We failed to create a new IRQ for the requested affinity, + * sharing existing IRQ. + */ + goto out; + } + least_loaded_irq = new_irq; + goto unlock; +out: + mlx5_irq_get_locked(least_loaded_irq); + if (mlx5_irq_read_locked(least_loaded_irq) > pool->max_threshold) + mlx5_core_dbg(pool->dev, "IRQ %u overloaded, pool_name: %s, %u EQs on this irq\n", + pci_irq_vector(pool->dev->pdev, + mlx5_irq_get_index(least_loaded_irq)), pool->name, + mlx5_irq_read_locked(least_loaded_irq) / MLX5_EQ_REFS_PER_IRQ); +unlock: + mutex_unlock(&pool->lock); + return least_loaded_irq; +} + +void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *irq) +{ + struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); + int cpu; + + cpu = cpumask_first(mlx5_irq_get_affinity_mask(irq)); + synchronize_irq(pci_irq_vector(pool->dev->pdev, + mlx5_irq_get_index(irq))); + if (mlx5_irq_put(irq)) + if (pool->irqs_per_cpu) + cpu_put(pool, cpu); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c new file mode 100644 index 0000000000..f4b777d4e1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "lag.h" + +static char *get_str_mode_type(struct mlx5_lag *ldev) +{ + switch (ldev->mode) { + case MLX5_LAG_MODE_ROCE: return "roce"; + case MLX5_LAG_MODE_SRIOV: return "switchdev"; + case MLX5_LAG_MODE_MULTIPATH: return "multipath"; + case MLX5_LAG_MODE_MPESW: return "multiport_eswitch"; + default: return "invalid"; + } + + return NULL; +} + +static int type_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + struct mlx5_lag *ldev; + char *mode = NULL; + + ldev = mlx5_lag_dev(dev); + mutex_lock(&ldev->lock); + if (__mlx5_lag_is_active(ldev)) + mode = get_str_mode_type(ldev); + mutex_unlock(&ldev->lock); + if (!mode) + return -EINVAL; + seq_printf(file, "%s\n", mode); + + return 0; +} + +static int port_sel_mode_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + struct mlx5_lag *ldev; + int ret = 0; + char *mode; + + ldev = mlx5_lag_dev(dev); + mutex_lock(&ldev->lock); + if (__mlx5_lag_is_active(ldev)) + mode = mlx5_get_str_port_sel_mode(ldev->mode, ldev->mode_flags); + else + ret = -EINVAL; + mutex_unlock(&ldev->lock); + if (ret) + return ret; + + seq_printf(file, "%s\n", mode); + return 0; +} + +static int state_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + struct mlx5_lag *ldev; + bool active; + + ldev = mlx5_lag_dev(dev); + mutex_lock(&ldev->lock); + active = __mlx5_lag_is_active(ldev); + mutex_unlock(&ldev->lock); + seq_printf(file, "%s\n", active ? "active" : "disabled"); + return 0; +} + +static int flags_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + bool fdb_sel_mode_native; + struct mlx5_lag *ldev; + bool shared_fdb; + bool lag_active; + + ldev = mlx5_lag_dev(dev); + mutex_lock(&ldev->lock); + lag_active = __mlx5_lag_is_active(ldev); + if (!lag_active) + goto unlock; + + shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); + fdb_sel_mode_native = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, + &ldev->mode_flags); + +unlock: + mutex_unlock(&ldev->lock); + if (!lag_active) + return -EINVAL; + + seq_printf(file, "%s:%s\n", "shared_fdb", shared_fdb ? "on" : "off"); + seq_printf(file, "%s:%s\n", "fdb_selection_mode", + fdb_sel_mode_native ? "native" : "affinity"); + return 0; +} + +static int mapping_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + u8 ports[MLX5_MAX_PORTS] = {}; + struct mlx5_lag *ldev; + bool hash = false; + bool lag_active; + int num_ports; + int i; + + ldev = mlx5_lag_dev(dev); + mutex_lock(&ldev->lock); + lag_active = __mlx5_lag_is_active(ldev); + if (lag_active) { + if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) { + mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, ports, + &num_ports); + hash = true; + } else { + for (i = 0; i < ldev->ports; i++) + ports[i] = ldev->v2p_map[i]; + num_ports = ldev->ports; + } + } + mutex_unlock(&ldev->lock); + if (!lag_active) + return -EINVAL; + + for (i = 0; i < num_ports; i++) { + if (hash) + seq_printf(file, "%d\n", ports[i] + 1); + else + seq_printf(file, "%d:%d\n", i + 1, ports[i]); + } + + return 0; +} + +static int members_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + struct mlx5_lag *ldev; + int i; + + ldev = mlx5_lag_dev(dev); + mutex_lock(&ldev->lock); + for (i = 0; i < ldev->ports; i++) { + if (!ldev->pf[i].dev) + continue; + seq_printf(file, "%s\n", dev_name(ldev->pf[i].dev->device)); + } + mutex_unlock(&ldev->lock); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(type); +DEFINE_SHOW_ATTRIBUTE(port_sel_mode); +DEFINE_SHOW_ATTRIBUTE(state); +DEFINE_SHOW_ATTRIBUTE(flags); +DEFINE_SHOW_ATTRIBUTE(mapping); +DEFINE_SHOW_ATTRIBUTE(members); + +void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev) +{ + struct dentry *dbg; + + dbg = debugfs_create_dir("lag", mlx5_debugfs_get_dev_root(dev)); + dev->priv.dbg.lag_debugfs = dbg; + + debugfs_create_file("type", 0444, dbg, dev, &type_fops); + debugfs_create_file("port_sel_mode", 0444, dbg, dev, &port_sel_mode_fops); + debugfs_create_file("state", 0444, dbg, dev, &state_fops); + debugfs_create_file("flags", 0444, dbg, dev, &flags_fops); + debugfs_create_file("mapping", 0444, dbg, dev, &mapping_fops); + debugfs_create_file("members", 0444, dbg, dev, &members_fops); +} + +void mlx5_ldev_remove_debugfs(struct dentry *dbg) +{ + debugfs_remove_recursive(dbg); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c new file mode 100644 index 0000000000..af3fac090b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -0,0 +1,1620 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include "lib/devcom.h" +#include "mlx5_core.h" +#include "eswitch.h" +#include "esw/acl/ofld.h" +#include "lag.h" +#include "mp.h" +#include "mpesw.h" + +enum { + MLX5_LAG_EGRESS_PORT_1 = 1, + MLX5_LAG_EGRESS_PORT_2, +}; + +/* General purpose, use for short periods of time. + * Beware of lock dependencies (preferably, no locks should be acquired + * under it). + */ +static DEFINE_SPINLOCK(lag_lock); + +static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) +{ + if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) + return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT; + + if (mode == MLX5_LAG_MODE_MPESW) + return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW; + + return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY; +} + +static u8 lag_active_port_bits(struct mlx5_lag *ldev) +{ + u8 enabled_ports[MLX5_MAX_PORTS] = {}; + u8 active_port = 0; + int num_enabled; + int idx; + + mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, enabled_ports, + &num_enabled); + for (idx = 0; idx < num_enabled; idx++) + active_port |= BIT_MASK(enabled_ports[idx]); + + return active_port; +} + +static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode, + unsigned long flags) +{ + bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, + &flags); + int port_sel_mode = get_port_sel_mode(mode, flags); + u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; + void *lag_ctx; + + lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); + MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG); + MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode); + + switch (port_sel_mode) { + case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); + break; + case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: + if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass)) + break; + + MLX5_SET(lagc, lag_ctx, active_port, + lag_active_port_bits(mlx5_lag_dev(dev))); + break; + default: + break; + } + MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode); + + return mlx5_cmd_exec_in(dev, create_lag, in); +} + +static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports, + u8 *ports) +{ + u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; + void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); + + MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); + MLX5_SET(modify_lag_in, in, field_select, 0x1); + + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); + + return mlx5_cmd_exec_in(dev, modify_lag, in); +} + +int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev) +{ + u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {}; + + MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG); + + return mlx5_cmd_exec_in(dev, create_vport_lag, in); +} +EXPORT_SYMBOL(mlx5_cmd_create_vport_lag); + +int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) +{ + u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {}; + + MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG); + + return mlx5_cmd_exec_in(dev, destroy_vport_lag, in); +} +EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); + +static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports, + u8 *ports, int *num_disabled) +{ + int i; + + *num_disabled = 0; + for (i = 0; i < num_ports; i++) { + if (!tracker->netdev_state[i].tx_enabled || + !tracker->netdev_state[i].link_up) + ports[(*num_disabled)++] = i; + } +} + +void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, + u8 *ports, int *num_enabled) +{ + int i; + + *num_enabled = 0; + for (i = 0; i < num_ports; i++) { + if (tracker->netdev_state[i].tx_enabled && + tracker->netdev_state[i].link_up) + ports[(*num_enabled)++] = i; + } + + if (*num_enabled == 0) + mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled); +} + +static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev, + struct mlx5_lag *ldev, + struct lag_tracker *tracker, + unsigned long flags) +{ + char buf[MLX5_MAX_PORTS * 10 + 1] = {}; + u8 enabled_ports[MLX5_MAX_PORTS] = {}; + int written = 0; + int num_enabled; + int idx; + int err; + int i; + int j; + + if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { + mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports, + &num_enabled); + for (i = 0; i < num_enabled; i++) { + err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1); + if (err != 3) + return; + written += err; + } + buf[written - 2] = 0; + mlx5_core_info(dev, "lag map active ports: %s\n", buf); + } else { + for (i = 0; i < ldev->ports; i++) { + for (j = 0; j < ldev->buckets; j++) { + idx = i * ldev->buckets + j; + err = scnprintf(buf + written, 10, + " port %d:%d", i + 1, ldev->v2p_map[idx]); + if (err != 9) + return; + written += err; + } + } + mlx5_core_info(dev, "lag map:%s\n", buf); + } +} + +static int mlx5_lag_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr); +static void mlx5_do_bond_work(struct work_struct *work); + +static void mlx5_ldev_free(struct kref *ref) +{ + struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref); + + if (ldev->nb.notifier_call) + unregister_netdevice_notifier_net(&init_net, &ldev->nb); + mlx5_lag_mp_cleanup(ldev); + cancel_delayed_work_sync(&ldev->bond_work); + destroy_workqueue(ldev->wq); + mutex_destroy(&ldev->lock); + kfree(ldev); +} + +static void mlx5_ldev_put(struct mlx5_lag *ldev) +{ + kref_put(&ldev->ref, mlx5_ldev_free); +} + +static void mlx5_ldev_get(struct mlx5_lag *ldev) +{ + kref_get(&ldev->ref); +} + +static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + int err; + + ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); + if (!ldev) + return NULL; + + ldev->wq = create_singlethread_workqueue("mlx5_lag"); + if (!ldev->wq) { + kfree(ldev); + return NULL; + } + + kref_init(&ldev->ref); + mutex_init(&ldev->lock); + INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); + + ldev->nb.notifier_call = mlx5_lag_netdev_event; + if (register_netdevice_notifier_net(&init_net, &ldev->nb)) { + ldev->nb.notifier_call = NULL; + mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); + } + ldev->mode = MLX5_LAG_MODE_NONE; + + err = mlx5_lag_mp_init(ldev); + if (err) + mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", + err); + + ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports); + ldev->buckets = 1; + + return ldev; +} + +int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, + struct net_device *ndev) +{ + int i; + + for (i = 0; i < ldev->ports; i++) + if (ldev->pf[i].netdev == ndev) + return i; + + return -ENOENT; +} + +static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) +{ + return ldev->mode == MLX5_LAG_MODE_ROCE; +} + +static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) +{ + return ldev->mode == MLX5_LAG_MODE_SRIOV; +} + +/* Create a mapping between steering slots and active ports. + * As we have ldev->buckets slots per port first assume the native + * mapping should be used. + * If there are ports that are disabled fill the relevant slots + * with mapping that points to active ports. + */ +static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, + u8 num_ports, + u8 buckets, + u8 *ports) +{ + int disabled[MLX5_MAX_PORTS] = {}; + int enabled[MLX5_MAX_PORTS] = {}; + int disabled_ports_num = 0; + int enabled_ports_num = 0; + int idx; + u32 rand; + int i; + int j; + + for (i = 0; i < num_ports; i++) { + if (tracker->netdev_state[i].tx_enabled && + tracker->netdev_state[i].link_up) + enabled[enabled_ports_num++] = i; + else + disabled[disabled_ports_num++] = i; + } + + /* Use native mapping by default where each port's buckets + * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc + */ + for (i = 0; i < num_ports; i++) + for (j = 0; j < buckets; j++) { + idx = i * buckets + j; + ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i; + } + + /* If all ports are disabled/enabled keep native mapping */ + if (enabled_ports_num == num_ports || + disabled_ports_num == num_ports) + return; + + /* Go over the disabled ports and for each assign a random active port */ + for (i = 0; i < disabled_ports_num; i++) { + for (j = 0; j < buckets; j++) { + get_random_bytes(&rand, 4); + ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1; + } + } +} + +static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev) +{ + int i; + + for (i = 0; i < ldev->ports; i++) + if (ldev->pf[i].has_drop) + return true; + return false; +} + +static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev) +{ + int i; + + for (i = 0; i < ldev->ports; i++) { + if (!ldev->pf[i].has_drop) + continue; + + mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch, + MLX5_VPORT_UPLINK); + ldev->pf[i].has_drop = false; + } +} + +static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, + struct lag_tracker *tracker) +{ + u8 disabled_ports[MLX5_MAX_PORTS] = {}; + struct mlx5_core_dev *dev; + int disabled_index; + int num_disabled; + int err; + int i; + + /* First delete the current drop rule so there won't be any dropped + * packets + */ + mlx5_lag_drop_rule_cleanup(ldev); + + if (!ldev->tracker.has_inactive) + return; + + mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled); + + for (i = 0; i < num_disabled; i++) { + disabled_index = disabled_ports[i]; + dev = ldev->pf[disabled_index].dev; + err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch, + MLX5_VPORT_UPLINK); + if (!err) + ldev->pf[disabled_index].has_drop = true; + else + mlx5_core_err(dev, + "Failed to create lag drop rule, error: %d", err); + } +} + +static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports) +{ + u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; + void *lag_ctx; + + lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); + + MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); + MLX5_SET(modify_lag_in, in, field_select, 0x2); + + MLX5_SET(lagc, lag_ctx, active_port, ports); + + return mlx5_cmd_exec_in(dev, modify_lag, in); +} + +static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + u8 active_ports; + int ret; + + if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) { + ret = mlx5_lag_port_sel_modify(ldev, ports); + if (ret || + !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass)) + return ret; + + active_ports = lag_active_port_bits(ldev); + + return mlx5_cmd_modify_active_port(dev0, active_ports); + } + return mlx5_cmd_modify_lag(dev0, ldev->ports, ports); +} + +void mlx5_modify_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker) +{ + u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {}; + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + int idx; + int err; + int i; + int j; + + mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports); + + for (i = 0; i < ldev->ports; i++) { + for (j = 0; j < ldev->buckets; j++) { + idx = i * ldev->buckets + j; + if (ports[idx] == ldev->v2p_map[idx]) + continue; + err = _mlx5_modify_lag(ldev, ports); + if (err) { + mlx5_core_err(dev0, + "Failed to modify LAG (%d)\n", + err); + return; + } + memcpy(ldev->v2p_map, ports, sizeof(ports)); + + mlx5_lag_print_mapping(dev0, ldev, tracker, + ldev->mode_flags); + break; + } + } + + if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && + !(ldev->mode == MLX5_LAG_MODE_ROCE)) + mlx5_lag_drop_rule_setup(ldev, tracker); +} + +static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev, + unsigned long *flags) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + + if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) { + if (ldev->ports > 2) + return -EINVAL; + return 0; + } + + if (ldev->ports > 2) + ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS; + + set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); + + return 0; +} + +static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + enum mlx5_lag_mode mode, + unsigned long *flags) +{ + struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1]; + + if (mode == MLX5_LAG_MODE_MPESW) + return; + + if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) && + tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH) { + if (ldev->ports > 2) + ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS; + set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); + } +} + +static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, + struct lag_tracker *tracker, bool shared_fdb, + unsigned long *flags) +{ + bool roce_lag = mode == MLX5_LAG_MODE_ROCE; + + *flags = 0; + if (shared_fdb) { + set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags); + set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); + } + + if (mode == MLX5_LAG_MODE_MPESW) + set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); + + if (roce_lag) + return mlx5_lag_set_port_sel_mode_roce(ldev, flags); + + mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, mode, flags); + return 0; +} + +char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) +{ + int port_sel_mode = get_port_sel_mode(mode, flags); + + switch (port_sel_mode) { + case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity"; + case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash"; + case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw"; + default: return "invalid"; + } +} + +static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_eswitch *master_esw = dev0->priv.eswitch; + int err; + int i; + + for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) { + struct mlx5_eswitch *slave_esw = ldev->pf[i].dev->priv.eswitch; + + err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw, + slave_esw, ldev->ports); + if (err) + goto err; + } + return 0; +err: + for (; i > MLX5_LAG_P1; i--) + mlx5_eswitch_offloads_single_fdb_del_one(master_esw, + ldev->pf[i].dev->priv.eswitch); + return err; +} + +static int mlx5_create_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + enum mlx5_lag_mode mode, + unsigned long flags) +{ + bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; + int err; + + if (tracker) + mlx5_lag_print_mapping(dev0, ldev, tracker, flags); + mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n", + shared_fdb, mlx5_get_str_port_sel_mode(mode, flags)); + + err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags); + if (err) { + mlx5_core_err(dev0, + "Failed to create LAG (%d)\n", + err); + return err; + } + + if (shared_fdb) { + err = mlx5_lag_create_single_fdb(ldev); + if (err) + mlx5_core_err(dev0, "Can't enable single FDB mode\n"); + else + mlx5_core_info(dev0, "Operation mode is single FDB\n"); + } + + if (err) { + MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); + if (mlx5_cmd_exec_in(dev0, destroy_lag, in)) + mlx5_core_err(dev0, + "Failed to deactivate RoCE LAG; driver restart required\n"); + } + + return err; +} + +int mlx5_activate_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + enum mlx5_lag_mode mode, + bool shared_fdb) +{ + bool roce_lag = mode == MLX5_LAG_MODE_ROCE; + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + unsigned long flags = 0; + int err; + + err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags); + if (err) + return err; + + if (mode != MLX5_LAG_MODE_MPESW) { + mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map); + if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { + err = mlx5_lag_port_sel_create(ldev, tracker->hash_type, + ldev->v2p_map); + if (err) { + mlx5_core_err(dev0, + "Failed to create LAG port selection(%d)\n", + err); + return err; + } + } + } + + err = mlx5_create_lag(ldev, tracker, mode, flags); + if (err) { + if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) + mlx5_lag_port_sel_destroy(ldev); + if (roce_lag) + mlx5_core_err(dev0, + "Failed to activate RoCE LAG\n"); + else + mlx5_core_err(dev0, + "Failed to activate VF LAG\n" + "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); + return err; + } + + if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && + !roce_lag) + mlx5_lag_drop_rule_setup(ldev, tracker); + + ldev->mode = mode; + ldev->mode_flags = flags; + return 0; +} + +int mlx5_deactivate_lag(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_eswitch *master_esw = dev0->priv.eswitch; + u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; + bool roce_lag = __mlx5_lag_is_roce(ldev); + unsigned long flags = ldev->mode_flags; + int err; + int i; + + ldev->mode = MLX5_LAG_MODE_NONE; + ldev->mode_flags = 0; + mlx5_lag_mp_reset(ldev); + + if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) { + for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) + mlx5_eswitch_offloads_single_fdb_del_one(master_esw, + ldev->pf[i].dev->priv.eswitch); + clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); + } + + MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); + err = mlx5_cmd_exec_in(dev0, destroy_lag, in); + if (err) { + if (roce_lag) { + mlx5_core_err(dev0, + "Failed to deactivate RoCE LAG; driver restart required\n"); + } else { + mlx5_core_err(dev0, + "Failed to deactivate VF LAG; driver restart required\n" + "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); + } + return err; + } + + if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) + mlx5_lag_port_sel_destroy(ldev); + if (mlx5_lag_has_drop_rule(ldev)) + mlx5_lag_drop_rule_cleanup(ldev); + + return 0; +} + +#define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 4 +bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) +{ +#ifdef CONFIG_MLX5_ESWITCH + struct mlx5_core_dev *dev; + u8 mode; +#endif + int i; + + for (i = 0; i < ldev->ports; i++) + if (!ldev->pf[i].dev) + return false; + +#ifdef CONFIG_MLX5_ESWITCH + for (i = 0; i < ldev->ports; i++) { + dev = ldev->pf[i].dev; + if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev)) + return false; + } + + dev = ldev->pf[MLX5_LAG_P1].dev; + mode = mlx5_eswitch_mode(dev); + for (i = 0; i < ldev->ports; i++) + if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) + return false; + + if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports > MLX5_LAG_OFFLOADS_SUPPORTED_PORTS) + return false; +#else + for (i = 0; i < ldev->ports; i++) + if (mlx5_sriov_is_enabled(ldev->pf[i].dev)) + return false; +#endif + return true; +} + +void mlx5_lag_add_devices(struct mlx5_lag *ldev) +{ + int i; + + for (i = 0; i < ldev->ports; i++) { + if (!ldev->pf[i].dev) + continue; + + if (ldev->pf[i].dev->priv.flags & + MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) + continue; + + ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(ldev->pf[i].dev); + } +} + +void mlx5_lag_remove_devices(struct mlx5_lag *ldev) +{ + int i; + + for (i = 0; i < ldev->ports; i++) { + if (!ldev->pf[i].dev) + continue; + + if (ldev->pf[i].dev->priv.flags & + MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) + continue; + + ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(ldev->pf[i].dev); + } +} + +void mlx5_disable_lag(struct mlx5_lag *ldev) +{ + bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + bool roce_lag; + int err; + int i; + + roce_lag = __mlx5_lag_is_roce(ldev); + + if (shared_fdb) { + mlx5_lag_remove_devices(ldev); + } else if (roce_lag) { + if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) { + dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + } + for (i = 1; i < ldev->ports; i++) + mlx5_nic_vport_disable_roce(ldev->pf[i].dev); + } + + err = mlx5_deactivate_lag(ldev); + if (err) + return; + + if (shared_fdb || roce_lag) + mlx5_lag_add_devices(ldev); + + if (shared_fdb) + for (i = 0; i < ldev->ports; i++) + if (!(ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) + mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch); +} + +static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev; + int i; + + for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) { + dev = ldev->pf[i].dev; + if (is_mdev_switchdev_mode(dev) && + mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) && + MLX5_CAP_GEN(dev, lag_native_fdb_selection) && + MLX5_CAP_ESW(dev, root_ft_on_other_esw) && + mlx5_eswitch_get_npeers(dev->priv.eswitch) == + MLX5_CAP_GEN(dev, num_lag_ports) - 1) + continue; + return false; + } + + dev = ldev->pf[MLX5_LAG_P1].dev; + if (is_mdev_switchdev_mode(dev) && + mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) && + mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) && + MLX5_CAP_ESW(dev, esw_shared_ingress_acl) && + mlx5_eswitch_get_npeers(dev->priv.eswitch) == MLX5_CAP_GEN(dev, num_lag_ports) - 1) + return true; + + return false; +} + +static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev) +{ + bool roce_lag = true; + int i; + + for (i = 0; i < ldev->ports; i++) + roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev); + +#ifdef CONFIG_MLX5_ESWITCH + for (i = 0; i < ldev->ports; i++) + roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev); +#endif + + return roce_lag; +} + +static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond) +{ + return do_bond && __mlx5_lag_is_active(ldev) && + ldev->mode != MLX5_LAG_MODE_MPESW; +} + +static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond) +{ + return !do_bond && __mlx5_lag_is_active(ldev) && + ldev->mode != MLX5_LAG_MODE_MPESW; +} + +static void mlx5_do_bond(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct lag_tracker tracker = { }; + bool do_bond, roce_lag; + int err; + int i; + + if (!mlx5_lag_is_ready(ldev)) { + do_bond = false; + } else { + /* VF LAG is in multipath mode, ignore bond change requests */ + if (mlx5_lag_is_multipath(dev0)) + return; + + tracker = ldev->tracker; + + do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); + } + + if (do_bond && !__mlx5_lag_is_active(ldev)) { + bool shared_fdb = mlx5_shared_fdb_supported(ldev); + + roce_lag = mlx5_lag_is_roce_lag(ldev); + + if (shared_fdb || roce_lag) + mlx5_lag_remove_devices(ldev); + + err = mlx5_activate_lag(ldev, &tracker, + roce_lag ? MLX5_LAG_MODE_ROCE : + MLX5_LAG_MODE_SRIOV, + shared_fdb); + if (err) { + if (shared_fdb || roce_lag) + mlx5_lag_add_devices(ldev); + + return; + } else if (roce_lag) { + dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + for (i = 1; i < ldev->ports; i++) + mlx5_nic_vport_enable_roce(ldev->pf[i].dev); + } else if (shared_fdb) { + int i; + + dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + + for (i = 0; i < ldev->ports; i++) { + err = mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch); + if (err) + break; + } + + if (err) { + dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + mlx5_deactivate_lag(ldev); + mlx5_lag_add_devices(ldev); + for (i = 0; i < ldev->ports; i++) + mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch); + mlx5_core_err(dev0, "Failed to enable lag\n"); + return; + } + } + } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) { + mlx5_modify_lag(ldev, &tracker); + } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) { + mlx5_disable_lag(ldev); + } +} + +static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) +{ + queue_delayed_work(ldev->wq, &ldev->bond_work, delay); +} + +static void mlx5_do_bond_work(struct work_struct *work) +{ + struct delayed_work *delayed_work = to_delayed_work(work); + struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag, + bond_work); + int status; + + status = mlx5_dev_list_trylock(); + if (!status) { + mlx5_queue_bond_work(ldev, HZ); + return; + } + + mutex_lock(&ldev->lock); + if (ldev->mode_changes_in_progress) { + mutex_unlock(&ldev->lock); + mlx5_dev_list_unlock(); + mlx5_queue_bond_work(ldev, HZ); + return; + } + + mlx5_do_bond(ldev); + mutex_unlock(&ldev->lock); + mlx5_dev_list_unlock(); +} + +static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *upper = info->upper_dev, *ndev_tmp; + struct netdev_lag_upper_info *lag_upper_info = NULL; + bool is_bonded, is_in_lag, mode_supported; + bool has_inactive = 0; + struct slave *slave; + u8 bond_status = 0; + int num_slaves = 0; + int changed = 0; + int idx; + + if (!netif_is_lag_master(upper)) + return 0; + + if (info->linking) + lag_upper_info = info->upper_info; + + /* The event may still be of interest if the slave does not belong to + * us, but is enslaved to a master which has one or more of our netdevs + * as slaves (e.g., if a new slave is added to a master that bonds two + * of our netdevs, we should unbond). + */ + rcu_read_lock(); + for_each_netdev_in_bond_rcu(upper, ndev_tmp) { + idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); + if (idx >= 0) { + slave = bond_slave_get_rcu(ndev_tmp); + if (slave) + has_inactive |= bond_is_slave_inactive(slave); + bond_status |= (1 << idx); + } + + num_slaves++; + } + rcu_read_unlock(); + + /* None of this lagdev's netdevs are slaves of this master. */ + if (!(bond_status & GENMASK(ldev->ports - 1, 0))) + return 0; + + if (lag_upper_info) { + tracker->tx_type = lag_upper_info->tx_type; + tracker->hash_type = lag_upper_info->hash_type; + } + + tracker->has_inactive = has_inactive; + /* Determine bonding status: + * A device is considered bonded if both its physical ports are slaves + * of the same lag master, and only them. + */ + is_in_lag = num_slaves == ldev->ports && + bond_status == GENMASK(ldev->ports - 1, 0); + + /* Lag mode must be activebackup or hash. */ + mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP || + tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH; + + is_bonded = is_in_lag && mode_supported; + if (tracker->is_bonded != is_bonded) { + tracker->is_bonded = is_bonded; + changed = 1; + } + + if (!is_in_lag) + return changed; + + if (!mlx5_lag_is_ready(ldev)) + NL_SET_ERR_MSG_MOD(info->info.extack, + "Can't activate LAG offload, PF is configured with more than 64 VFs"); + else if (!mode_supported) + NL_SET_ERR_MSG_MOD(info->info.extack, + "Can't activate LAG offload, TX type isn't supported"); + + return changed; +} + +static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + struct net_device *ndev, + struct netdev_notifier_changelowerstate_info *info) +{ + struct netdev_lag_lower_state_info *lag_lower_info; + int idx; + + if (!netif_is_lag_port(ndev)) + return 0; + + idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev); + if (idx < 0) + return 0; + + /* This information is used to determine virtual to physical + * port mapping. + */ + lag_lower_info = info->lower_state_info; + if (!lag_lower_info) + return 0; + + tracker->netdev_state[idx] = *lag_lower_info; + + return 1; +} + +static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + struct net_device *ndev) +{ + struct net_device *ndev_tmp; + struct slave *slave; + bool has_inactive = 0; + int idx; + + if (!netif_is_lag_master(ndev)) + return 0; + + rcu_read_lock(); + for_each_netdev_in_bond_rcu(ndev, ndev_tmp) { + idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); + if (idx < 0) + continue; + + slave = bond_slave_get_rcu(ndev_tmp); + if (slave) + has_inactive |= bond_is_slave_inactive(slave); + } + rcu_read_unlock(); + + if (tracker->has_inactive == has_inactive) + return 0; + + tracker->has_inactive = has_inactive; + + return 1; +} + +/* this handler is always registered to netdev events */ +static int mlx5_lag_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + struct lag_tracker tracker; + struct mlx5_lag *ldev; + int changed = 0; + + if (event != NETDEV_CHANGEUPPER && + event != NETDEV_CHANGELOWERSTATE && + event != NETDEV_CHANGEINFODATA) + return NOTIFY_DONE; + + ldev = container_of(this, struct mlx5_lag, nb); + + tracker = ldev->tracker; + + switch (event) { + case NETDEV_CHANGEUPPER: + changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr); + break; + case NETDEV_CHANGELOWERSTATE: + changed = mlx5_handle_changelowerstate_event(ldev, &tracker, + ndev, ptr); + break; + case NETDEV_CHANGEINFODATA: + changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev); + break; + } + + ldev->tracker = tracker; + + if (changed) + mlx5_queue_bond_work(ldev, 0); + + return NOTIFY_DONE; +} + +static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, + struct mlx5_core_dev *dev, + struct net_device *netdev) +{ + unsigned int fn = mlx5_get_dev_index(dev); + unsigned long flags; + + if (fn >= ldev->ports) + return; + + spin_lock_irqsave(&lag_lock, flags); + ldev->pf[fn].netdev = netdev; + ldev->tracker.netdev_state[fn].link_up = 0; + ldev->tracker.netdev_state[fn].tx_enabled = 0; + spin_unlock_irqrestore(&lag_lock, flags); +} + +static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, + struct net_device *netdev) +{ + unsigned long flags; + int i; + + spin_lock_irqsave(&lag_lock, flags); + for (i = 0; i < ldev->ports; i++) { + if (ldev->pf[i].netdev == netdev) { + ldev->pf[i].netdev = NULL; + break; + } + } + spin_unlock_irqrestore(&lag_lock, flags); +} + +static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, + struct mlx5_core_dev *dev) +{ + unsigned int fn = mlx5_get_dev_index(dev); + + if (fn >= ldev->ports) + return; + + ldev->pf[fn].dev = dev; + dev->priv.lag = ldev; +} + +static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, + struct mlx5_core_dev *dev) +{ + int i; + + for (i = 0; i < ldev->ports; i++) + if (ldev->pf[i].dev == dev) + break; + + if (i == ldev->ports) + return; + + ldev->pf[i].dev = NULL; + dev->priv.lag = NULL; +} + +/* Must be called with intf_mutex held */ +static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev = NULL; + struct mlx5_core_dev *tmp_dev; + + tmp_dev = mlx5_get_next_phys_dev_lag(dev); + if (tmp_dev) + ldev = mlx5_lag_dev(tmp_dev); + + if (!ldev) { + ldev = mlx5_lag_dev_alloc(dev); + if (!ldev) { + mlx5_core_err(dev, "Failed to alloc lag dev\n"); + return 0; + } + mlx5_ldev_add_mdev(ldev, dev); + return 0; + } + + mutex_lock(&ldev->lock); + if (ldev->mode_changes_in_progress) { + mutex_unlock(&ldev->lock); + return -EAGAIN; + } + mlx5_ldev_get(ldev); + mlx5_ldev_add_mdev(ldev, dev); + mutex_unlock(&ldev->lock); + + return 0; +} + +void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + + ldev = mlx5_lag_dev(dev); + if (!ldev) + return; + + /* mdev is being removed, might as well remove debugfs + * as early as possible. + */ + mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs); +recheck: + mutex_lock(&ldev->lock); + if (ldev->mode_changes_in_progress) { + mutex_unlock(&ldev->lock); + msleep(100); + goto recheck; + } + mlx5_ldev_remove_mdev(ldev, dev); + mutex_unlock(&ldev->lock); + mlx5_ldev_put(ldev); +} + +void mlx5_lag_add_mdev(struct mlx5_core_dev *dev) +{ + int err; + + if (!mlx5_lag_is_supported(dev)) + return; + +recheck: + mlx5_dev_list_lock(); + err = __mlx5_lag_dev_add_mdev(dev); + mlx5_dev_list_unlock(); + + if (err) { + msleep(100); + goto recheck; + } + mlx5_ldev_add_debugfs(dev); +} + +void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, + struct net_device *netdev) +{ + struct mlx5_lag *ldev; + bool lag_is_active; + + ldev = mlx5_lag_dev(dev); + if (!ldev) + return; + + mutex_lock(&ldev->lock); + mlx5_ldev_remove_netdev(ldev, netdev); + clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); + + lag_is_active = __mlx5_lag_is_active(ldev); + mutex_unlock(&ldev->lock); + + if (lag_is_active) + mlx5_queue_bond_work(ldev, 0); +} + +void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, + struct net_device *netdev) +{ + struct mlx5_lag *ldev; + int i; + + ldev = mlx5_lag_dev(dev); + if (!ldev) + return; + + mutex_lock(&ldev->lock); + mlx5_ldev_add_netdev(ldev, dev, netdev); + + for (i = 0; i < ldev->ports; i++) + if (!ldev->pf[i].netdev) + break; + + if (i >= ldev->ports) + set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); + mutex_unlock(&ldev->lock); + mlx5_queue_bond_work(ldev, 0); +} + +bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + unsigned long flags; + bool res; + + spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_roce(ldev); + spin_unlock_irqrestore(&lag_lock, flags); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_roce); + +bool mlx5_lag_is_active(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + unsigned long flags; + bool res; + + spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_active(ldev); + spin_unlock_irqrestore(&lag_lock, flags); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_active); + +bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + unsigned long flags; + bool res = 0; + + spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + if (ldev) + res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags); + spin_unlock_irqrestore(&lag_lock, flags); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_mode_is_hash); + +bool mlx5_lag_is_master(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + unsigned long flags; + bool res; + + spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_active(ldev) && + dev == ldev->pf[MLX5_LAG_P1].dev; + spin_unlock_irqrestore(&lag_lock, flags); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_master); + +bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + unsigned long flags; + bool res; + + spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_sriov(ldev); + spin_unlock_irqrestore(&lag_lock, flags); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_sriov); + +bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + unsigned long flags; + bool res; + + spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + res = ldev && test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); + spin_unlock_irqrestore(&lag_lock, flags); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_shared_fdb); + +void mlx5_lag_disable_change(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + + ldev = mlx5_lag_dev(dev); + if (!ldev) + return; + + mlx5_dev_list_lock(); + mutex_lock(&ldev->lock); + + ldev->mode_changes_in_progress++; + if (__mlx5_lag_is_active(ldev)) + mlx5_disable_lag(ldev); + + mutex_unlock(&ldev->lock); + mlx5_dev_list_unlock(); +} + +void mlx5_lag_enable_change(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + + ldev = mlx5_lag_dev(dev); + if (!ldev) + return; + + mutex_lock(&ldev->lock); + ldev->mode_changes_in_progress--; + mutex_unlock(&ldev->lock); + mlx5_queue_bond_work(ldev, 0); +} + +struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) +{ + struct net_device *ndev = NULL; + struct mlx5_lag *ldev; + unsigned long flags; + int i; + + spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + + if (!(ldev && __mlx5_lag_is_roce(ldev))) + goto unlock; + + if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { + for (i = 0; i < ldev->ports; i++) + if (ldev->tracker.netdev_state[i].tx_enabled) + ndev = ldev->pf[i].netdev; + if (!ndev) + ndev = ldev->pf[ldev->ports - 1].netdev; + } else { + ndev = ldev->pf[MLX5_LAG_P1].netdev; + } + if (ndev) + dev_hold(ndev); + +unlock: + spin_unlock_irqrestore(&lag_lock, flags); + + return ndev; +} +EXPORT_SYMBOL(mlx5_lag_get_roce_netdev); + +u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, + struct net_device *slave) +{ + struct mlx5_lag *ldev; + unsigned long flags; + u8 port = 0; + int i; + + spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + if (!(ldev && __mlx5_lag_is_roce(ldev))) + goto unlock; + + for (i = 0; i < ldev->ports; i++) { + if (ldev->pf[MLX5_LAG_P1].netdev == slave) { + port = i; + break; + } + } + + port = ldev->v2p_map[port * ldev->buckets]; + +unlock: + spin_unlock_irqrestore(&lag_lock, flags); + return port; +} +EXPORT_SYMBOL(mlx5_lag_get_slave_port); + +u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + + ldev = mlx5_lag_dev(dev); + if (!ldev) + return 0; + + return ldev->ports; +} +EXPORT_SYMBOL(mlx5_lag_get_num_ports); + +struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i) +{ + struct mlx5_core_dev *peer_dev = NULL; + struct mlx5_lag *ldev; + unsigned long flags; + int idx; + + spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + if (!ldev) + goto unlock; + + if (*i == ldev->ports) + goto unlock; + for (idx = *i; idx < ldev->ports; idx++) + if (ldev->pf[idx].dev != dev) + break; + + if (idx == ldev->ports) { + *i = idx; + goto unlock; + } + *i = idx + 1; + + peer_dev = ldev->pf[idx].dev; + +unlock: + spin_unlock_irqrestore(&lag_lock, flags); + return peer_dev; +} +EXPORT_SYMBOL(mlx5_lag_get_next_peer_mdev); + +int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, + u64 *values, + int num_counters, + size_t *offsets) +{ + int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); + struct mlx5_core_dev **mdev; + struct mlx5_lag *ldev; + unsigned long flags; + int num_ports; + int ret, i, j; + void *out; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL); + if (!mdev) { + ret = -ENOMEM; + goto free_out; + } + + memset(values, 0, sizeof(*values) * num_counters); + + spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + if (ldev && __mlx5_lag_is_active(ldev)) { + num_ports = ldev->ports; + for (i = 0; i < ldev->ports; i++) + mdev[i] = ldev->pf[i].dev; + } else { + num_ports = 1; + mdev[MLX5_LAG_P1] = dev; + } + spin_unlock_irqrestore(&lag_lock, flags); + + for (i = 0; i < num_ports; ++i) { + u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {}; + + MLX5_SET(query_cong_statistics_in, in, opcode, + MLX5_CMD_OP_QUERY_CONG_STATISTICS); + ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in, + out); + if (ret) + goto free_mdev; + + for (j = 0; j < num_counters; ++j) + values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); + } + +free_mdev: + kvfree(mdev); +free_out: + kvfree(out); + return ret; +} +EXPORT_SYMBOL(mlx5_lag_query_cong_counters); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h new file mode 100644 index 0000000000..481e92f39f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_LAG_H__ +#define __MLX5_LAG_H__ + +#include + +#define MLX5_LAG_MAX_HASH_BUCKETS 16 +#include "mlx5_core.h" +#include "mp.h" +#include "port_sel.h" +#include "mpesw.h" + +enum { + MLX5_LAG_P1, + MLX5_LAG_P2, +}; + +enum { + MLX5_LAG_FLAG_NDEVS_READY, +}; + +enum { + MLX5_LAG_MODE_FLAG_HASH_BASED, + MLX5_LAG_MODE_FLAG_SHARED_FDB, + MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, +}; + +enum mlx5_lag_mode { + MLX5_LAG_MODE_NONE, + MLX5_LAG_MODE_ROCE, + MLX5_LAG_MODE_SRIOV, + MLX5_LAG_MODE_MULTIPATH, + MLX5_LAG_MODE_MPESW, +}; + +struct lag_func { + struct mlx5_core_dev *dev; + struct net_device *netdev; + bool has_drop; +}; + +/* Used for collection of netdev event info. */ +struct lag_tracker { + enum netdev_lag_tx_type tx_type; + struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS]; + unsigned int is_bonded:1; + unsigned int has_inactive:1; + enum netdev_lag_hash hash_type; +}; + +/* LAG data of a ConnectX card. + * It serves both its phys functions. + */ +struct mlx5_lag { + enum mlx5_lag_mode mode; + unsigned long mode_flags; + unsigned long state_flags; + u8 ports; + u8 buckets; + int mode_changes_in_progress; + u8 v2p_map[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS]; + struct kref ref; + struct lag_func pf[MLX5_MAX_PORTS]; + struct lag_tracker tracker; + struct workqueue_struct *wq; + struct delayed_work bond_work; + struct notifier_block nb; + struct lag_mp lag_mp; + struct mlx5_lag_port_sel port_sel; + /* Protect lag fields/state changes */ + struct mutex lock; + struct lag_mpesw lag_mpesw; +}; + +static inline struct mlx5_lag * +mlx5_lag_dev(struct mlx5_core_dev *dev) +{ + return dev->priv.lag; +} + +static inline bool +__mlx5_lag_is_active(struct mlx5_lag *ldev) +{ + return ldev->mode != MLX5_LAG_MODE_NONE; +} + +static inline bool +mlx5_lag_is_ready(struct mlx5_lag *ldev) +{ + return test_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); +} + +bool mlx5_lag_check_prereq(struct mlx5_lag *ldev); +void mlx5_modify_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker); +int mlx5_activate_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + enum mlx5_lag_mode mode, + bool shared_fdb); +int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, + struct net_device *ndev); + +char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags); +void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, + u8 *ports, int *num_enabled); + +void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev); +void mlx5_ldev_remove_debugfs(struct dentry *dbg); +void mlx5_disable_lag(struct mlx5_lag *ldev); +void mlx5_lag_remove_devices(struct mlx5_lag *ldev); +int mlx5_deactivate_lag(struct mlx5_lag *ldev); +void mlx5_lag_add_devices(struct mlx5_lag *ldev); + +static inline bool mlx5_lag_is_supported(struct mlx5_core_dev *dev) +{ + if (!MLX5_CAP_GEN(dev, vport_group_manager) || + !MLX5_CAP_GEN(dev, lag_master) || + MLX5_CAP_GEN(dev, num_lag_ports) < 2 || + MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS) + return false; + return true; +} + +#endif /* __MLX5_LAG_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c new file mode 100644 index 0000000000..b1aa494c76 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -0,0 +1,385 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include +#include +#include "lag/lag.h" +#include "lag/mp.h" +#include "mlx5_core.h" +#include "eswitch.h" +#include "lib/events.h" + +static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev) +{ + return ldev->mode == MLX5_LAG_MODE_MULTIPATH; +} + +#define MLX5_LAG_MULTIPATH_OFFLOADS_SUPPORTED_PORTS 2 +static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) +{ + if (!mlx5_lag_is_ready(ldev)) + return false; + + if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev)) + return false; + + if (ldev->ports > MLX5_LAG_MULTIPATH_OFFLOADS_SUPPORTED_PORTS) + return false; + + return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev, + ldev->pf[MLX5_LAG_P2].dev); +} + +bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev = mlx5_lag_dev(dev); + + return ldev && __mlx5_lag_is_multipath(ldev); +} + +/** + * mlx5_lag_set_port_affinity + * + * @ldev: lag device + * @port: + * 0 - set normal affinity. + * 1 - set affinity to port 1. + * 2 - set affinity to port 2. + * + **/ +static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, + enum mlx5_lag_port_affinity port) +{ + struct lag_tracker tracker = {}; + + if (!__mlx5_lag_is_multipath(ldev)) + return; + + switch (port) { + case MLX5_LAG_NORMAL_AFFINITY: + tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true; + tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true; + tracker.netdev_state[MLX5_LAG_P1].link_up = true; + tracker.netdev_state[MLX5_LAG_P2].link_up = true; + break; + case MLX5_LAG_P1_AFFINITY: + tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true; + tracker.netdev_state[MLX5_LAG_P1].link_up = true; + tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false; + tracker.netdev_state[MLX5_LAG_P2].link_up = false; + break; + case MLX5_LAG_P2_AFFINITY: + tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false; + tracker.netdev_state[MLX5_LAG_P1].link_up = false; + tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true; + tracker.netdev_state[MLX5_LAG_P2].link_up = true; + break; + default: + mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev, + "Invalid affinity port %d", port); + return; + } + + if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled) + mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events, + MLX5_DEV_EVENT_PORT_AFFINITY, + (void *)0); + + if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled) + mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events, + MLX5_DEV_EVENT_PORT_AFFINITY, + (void *)0); + + mlx5_modify_lag(ldev, &tracker); +} + +static void mlx5_lag_fib_event_flush(struct notifier_block *nb) +{ + struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb); + + flush_workqueue(mp->wq); +} + +static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len) +{ + mp->fib.mfi = fi; + mp->fib.priority = fi->fib_priority; + mp->fib.dst = dst; + mp->fib.dst_len = dst_len; +} + +struct mlx5_fib_event_work { + struct work_struct work; + struct mlx5_lag *ldev; + unsigned long event; + union { + struct fib_entry_notifier_info fen_info; + struct fib_nh_notifier_info fnh_info; + }; +}; + +static struct net_device* +mlx5_lag_get_next_fib_dev(struct mlx5_lag *ldev, + struct fib_info *fi, + struct net_device *current_dev) +{ + struct net_device *fib_dev; + int i, ldev_idx, nhs; + + nhs = fib_info_num_path(fi); + i = 0; + if (current_dev) { + for (; i < nhs; i++) { + fib_dev = fib_info_nh(fi, i)->fib_nh_dev; + if (fib_dev == current_dev) { + i++; + break; + } + } + } + for (; i < nhs; i++) { + fib_dev = fib_info_nh(fi, i)->fib_nh_dev; + ldev_idx = mlx5_lag_dev_get_netdev_idx(ldev, fib_dev); + if (ldev_idx >= 0) + return ldev->pf[ldev_idx].netdev; + } + + return NULL; +} + +static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, + struct fib_entry_notifier_info *fen_info) +{ + struct net_device *nh_dev0, *nh_dev1; + struct fib_info *fi = fen_info->fi; + struct lag_mp *mp = &ldev->lag_mp; + + /* Handle delete event */ + if (event == FIB_EVENT_ENTRY_DEL) { + /* stop track */ + if (mp->fib.mfi == fi) + mp->fib.mfi = NULL; + return; + } + + /* Handle multipath entry with lower priority value */ + if (mp->fib.mfi && mp->fib.mfi != fi && + (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) && + fi->fib_priority >= mp->fib.priority) + return; + + nh_dev0 = mlx5_lag_get_next_fib_dev(ldev, fi, NULL); + nh_dev1 = mlx5_lag_get_next_fib_dev(ldev, fi, nh_dev0); + + /* Handle add/replace event */ + if (!nh_dev0) { + if (mp->fib.dst == fen_info->dst && mp->fib.dst_len == fen_info->dst_len) + mp->fib.mfi = NULL; + return; + } + + if (nh_dev0 == nh_dev1) { + mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev, + "Multipath offload doesn't support routes with multiple nexthops of the same device"); + return; + } + + if (!nh_dev1) { + if (__mlx5_lag_is_active(ldev)) { + int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev0); + + i++; + mlx5_lag_set_port_affinity(ldev, i); + mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len); + } + + return; + } + + /* First time we see multipath route */ + if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) { + struct lag_tracker tracker; + + tracker = ldev->tracker; + mlx5_activate_lag(ldev, &tracker, MLX5_LAG_MODE_MULTIPATH, false); + } + + mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); + mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len); +} + +static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, + unsigned long event, + struct fib_nh *fib_nh, + struct fib_info *fi) +{ + struct lag_mp *mp = &ldev->lag_mp; + + /* Check the nh event is related to the route */ + if (!mp->fib.mfi || mp->fib.mfi != fi) + return; + + /* nh added/removed */ + if (event == FIB_EVENT_NH_DEL) { + int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev); + + if (i >= 0) { + i = (i + 1) % 2 + 1; /* peer port */ + mlx5_lag_set_port_affinity(ldev, i); + } + } else if (event == FIB_EVENT_NH_ADD && + fib_info_num_path(fi) == 2) { + mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); + } +} + +static void mlx5_lag_fib_update(struct work_struct *work) +{ + struct mlx5_fib_event_work *fib_work = + container_of(work, struct mlx5_fib_event_work, work); + struct mlx5_lag *ldev = fib_work->ldev; + struct fib_nh *fib_nh; + + /* Protect internal structures from changes */ + rtnl_lock(); + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: + case FIB_EVENT_ENTRY_DEL: + mlx5_lag_fib_route_event(ldev, fib_work->event, + &fib_work->fen_info); + fib_info_put(fib_work->fen_info.fi); + break; + case FIB_EVENT_NH_ADD: + case FIB_EVENT_NH_DEL: + fib_nh = fib_work->fnh_info.fib_nh; + mlx5_lag_fib_nexthop_event(ldev, + fib_work->event, + fib_work->fnh_info.fib_nh, + fib_nh->nh_parent); + fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); + break; + } + + rtnl_unlock(); + kfree(fib_work); +} + +static struct mlx5_fib_event_work * +mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event) +{ + struct mlx5_fib_event_work *fib_work; + + fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); + if (WARN_ON(!fib_work)) + return NULL; + + INIT_WORK(&fib_work->work, mlx5_lag_fib_update); + fib_work->ldev = ldev; + fib_work->event = event; + + return fib_work; +} + +static int mlx5_lag_fib_event(struct notifier_block *nb, + unsigned long event, + void *ptr) +{ + struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb); + struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp); + struct fib_notifier_info *info = ptr; + struct mlx5_fib_event_work *fib_work; + struct fib_entry_notifier_info *fen_info; + struct fib_nh_notifier_info *fnh_info; + struct fib_info *fi; + + if (info->family != AF_INET) + return NOTIFY_DONE; + + if (!mlx5_lag_multipath_check_prereq(ldev)) + return NOTIFY_DONE; + + switch (event) { + case FIB_EVENT_ENTRY_REPLACE: + case FIB_EVENT_ENTRY_DEL: + fen_info = container_of(info, struct fib_entry_notifier_info, + info); + fi = fen_info->fi; + if (fi->nh) + return NOTIFY_DONE; + + fib_work = mlx5_lag_init_fib_work(ldev, event); + if (!fib_work) + return NOTIFY_DONE; + fib_work->fen_info = *fen_info; + /* Take reference on fib_info to prevent it from being + * freed while work is queued. Release it afterwards. + */ + fib_info_hold(fib_work->fen_info.fi); + break; + case FIB_EVENT_NH_ADD: + case FIB_EVENT_NH_DEL: + fnh_info = container_of(info, struct fib_nh_notifier_info, + info); + fib_work = mlx5_lag_init_fib_work(ldev, event); + if (!fib_work) + return NOTIFY_DONE; + fib_work->fnh_info = *fnh_info; + fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); + break; + default: + return NOTIFY_DONE; + } + + queue_work(mp->wq, &fib_work->work); + + return NOTIFY_DONE; +} + +void mlx5_lag_mp_reset(struct mlx5_lag *ldev) +{ + /* Clear mfi, as it might become stale when a route delete event + * has been missed, see mlx5_lag_fib_route_event(). + */ + ldev->lag_mp.fib.mfi = NULL; +} + +int mlx5_lag_mp_init(struct mlx5_lag *ldev) +{ + struct lag_mp *mp = &ldev->lag_mp; + int err; + + /* always clear mfi, as it might become stale when a route delete event + * has been missed + */ + mp->fib.mfi = NULL; + + if (mp->fib_nb.notifier_call) + return 0; + + mp->wq = create_singlethread_workqueue("mlx5_lag_mp"); + if (!mp->wq) + return -ENOMEM; + + mp->fib_nb.notifier_call = mlx5_lag_fib_event; + err = register_fib_notifier(&init_net, &mp->fib_nb, + mlx5_lag_fib_event_flush, NULL); + if (err) { + destroy_workqueue(mp->wq); + mp->fib_nb.notifier_call = NULL; + } + + return err; +} + +void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) +{ + struct lag_mp *mp = &ldev->lag_mp; + + if (!mp->fib_nb.notifier_call) + return; + + unregister_fib_notifier(&init_net, &mp->fib_nb); + destroy_workqueue(mp->wq); + mp->fib_nb.notifier_call = NULL; + mp->fib.mfi = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h new file mode 100644 index 0000000000..056a066da6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_LAG_MP_H__ +#define __MLX5_LAG_MP_H__ + +#include "lag.h" +#include "mlx5_core.h" + +enum mlx5_lag_port_affinity { + MLX5_LAG_NORMAL_AFFINITY, + MLX5_LAG_P1_AFFINITY, + MLX5_LAG_P2_AFFINITY, +}; + +struct lag_mp { + struct notifier_block fib_nb; + struct { + const void *mfi; /* used in tracking fib events */ + u32 priority; + u32 dst; + int dst_len; + } fib; + struct workqueue_struct *wq; +}; + +#ifdef CONFIG_MLX5_ESWITCH + +void mlx5_lag_mp_reset(struct mlx5_lag *ldev); +int mlx5_lag_mp_init(struct mlx5_lag *ldev); +void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev); +bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev); + +#else /* CONFIG_MLX5_ESWITCH */ + +static inline void mlx5_lag_mp_reset(struct mlx5_lag *ldev) {}; +static inline int mlx5_lag_mp_init(struct mlx5_lag *ldev) { return 0; } +static inline void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) {} +static inline bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) { return false; } + +#endif /* CONFIG_MLX5_ESWITCH */ +#endif /* __MLX5_LAG_MP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c new file mode 100644 index 0000000000..4bf1539152 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -0,0 +1,214 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include +#include +#include "lag/lag.h" +#include "eswitch.h" +#include "esw/acl/ofld.h" +#include "lib/events.h" + +static void mlx5_mpesw_metadata_cleanup(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev; + struct mlx5_eswitch *esw; + u32 pf_metadata; + int i; + + for (i = 0; i < ldev->ports; i++) { + dev = ldev->pf[i].dev; + esw = dev->priv.eswitch; + pf_metadata = ldev->lag_mpesw.pf_metadata[i]; + if (!pf_metadata) + continue; + mlx5_esw_acl_ingress_vport_metadata_update(esw, MLX5_VPORT_UPLINK, 0); + mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_MULTIPORT_ESW, + (void *)0); + mlx5_esw_match_metadata_free(esw, pf_metadata); + ldev->lag_mpesw.pf_metadata[i] = 0; + } +} + +static int mlx5_mpesw_metadata_set(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev; + struct mlx5_eswitch *esw; + u32 pf_metadata; + int i, err; + + for (i = 0; i < ldev->ports; i++) { + dev = ldev->pf[i].dev; + esw = dev->priv.eswitch; + pf_metadata = mlx5_esw_match_metadata_alloc(esw); + if (!pf_metadata) { + err = -ENOSPC; + goto err_metadata; + } + + ldev->lag_mpesw.pf_metadata[i] = pf_metadata; + err = mlx5_esw_acl_ingress_vport_metadata_update(esw, MLX5_VPORT_UPLINK, + pf_metadata); + if (err) + goto err_metadata; + } + + for (i = 0; i < ldev->ports; i++) { + dev = ldev->pf[i].dev; + mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_MULTIPORT_ESW, + (void *)0); + } + + return 0; + +err_metadata: + mlx5_mpesw_metadata_cleanup(ldev); + return err; +} + +#define MLX5_LAG_MPESW_OFFLOADS_SUPPORTED_PORTS 2 +static int enable_mpesw(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + int err; + + if (ldev->mode != MLX5_LAG_MODE_NONE) + return -EINVAL; + + if (ldev->ports > MLX5_LAG_MPESW_OFFLOADS_SUPPORTED_PORTS) + return -EOPNOTSUPP; + + if (mlx5_eswitch_mode(dev0) != MLX5_ESWITCH_OFFLOADS || + !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table) || + !MLX5_CAP_GEN(dev0, create_lag_when_not_master_up) || + !mlx5_lag_check_prereq(ldev)) + return -EOPNOTSUPP; + + err = mlx5_mpesw_metadata_set(ldev); + if (err) + return err; + + mlx5_lag_remove_devices(ldev); + + err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, true); + if (err) { + mlx5_core_warn(dev0, "Failed to create LAG in MPESW mode (%d)\n", err); + goto err_add_devices; + } + + dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + err = mlx5_eswitch_reload_reps(dev0->priv.eswitch); + if (!err) + err = mlx5_eswitch_reload_reps(dev1->priv.eswitch); + if (err) + goto err_rescan_drivers; + + return 0; + +err_rescan_drivers: + dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + mlx5_deactivate_lag(ldev); +err_add_devices: + mlx5_lag_add_devices(ldev); + mlx5_eswitch_reload_reps(dev0->priv.eswitch); + mlx5_eswitch_reload_reps(dev1->priv.eswitch); + mlx5_mpesw_metadata_cleanup(ldev); + return err; +} + +static void disable_mpesw(struct mlx5_lag *ldev) +{ + if (ldev->mode == MLX5_LAG_MODE_MPESW) { + mlx5_mpesw_metadata_cleanup(ldev); + mlx5_disable_lag(ldev); + } +} + +static void mlx5_mpesw_work(struct work_struct *work) +{ + struct mlx5_mpesw_work_st *mpesww = container_of(work, struct mlx5_mpesw_work_st, work); + struct mlx5_lag *ldev = mpesww->lag; + + mlx5_dev_list_lock(); + mutex_lock(&ldev->lock); + if (ldev->mode_changes_in_progress) { + mpesww->result = -EAGAIN; + goto unlock; + } + + if (mpesww->op == MLX5_MPESW_OP_ENABLE) + mpesww->result = enable_mpesw(ldev); + else if (mpesww->op == MLX5_MPESW_OP_DISABLE) + disable_mpesw(ldev); +unlock: + mutex_unlock(&ldev->lock); + mlx5_dev_list_unlock(); + complete(&mpesww->comp); +} + +static int mlx5_lag_mpesw_queue_work(struct mlx5_core_dev *dev, + enum mpesw_op op) +{ + struct mlx5_lag *ldev = mlx5_lag_dev(dev); + struct mlx5_mpesw_work_st *work; + int err = 0; + + if (!ldev) + return 0; + + work = kzalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return -ENOMEM; + + INIT_WORK(&work->work, mlx5_mpesw_work); + init_completion(&work->comp); + work->op = op; + work->lag = ldev; + + if (!queue_work(ldev->wq, &work->work)) { + mlx5_core_warn(dev, "failed to queue mpesw work\n"); + err = -EINVAL; + goto out; + } + wait_for_completion(&work->comp); + err = work->result; +out: + kfree(work); + return err; +} + +void mlx5_lag_mpesw_disable(struct mlx5_core_dev *dev) +{ + mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_DISABLE); +} + +int mlx5_lag_mpesw_enable(struct mlx5_core_dev *dev) +{ + return mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_ENABLE); +} + +int mlx5_lag_mpesw_do_mirred(struct mlx5_core_dev *mdev, + struct net_device *out_dev, + struct netlink_ext_ack *extack) +{ + struct mlx5_lag *ldev = mlx5_lag_dev(mdev); + + if (!netif_is_bond_master(out_dev) || !ldev) + return 0; + + if (ldev->mode != MLX5_LAG_MODE_MPESW) + return 0; + + NL_SET_ERR_MSG_MOD(extack, "can't forward to bond in mpesw mode"); + return -EOPNOTSUPP; +} + +bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev = mlx5_lag_dev(dev); + + return ldev && ldev->mode == MLX5_LAG_MODE_MPESW; +} +EXPORT_SYMBOL(mlx5_lag_is_mpesw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h new file mode 100644 index 0000000000..02520f27a0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_LAG_MPESW_H__ +#define __MLX5_LAG_MPESW_H__ + +#include "lag.h" +#include "mlx5_core.h" + +struct lag_mpesw { + struct work_struct mpesw_work; + u32 pf_metadata[MLX5_MAX_PORTS]; +}; + +enum mpesw_op { + MLX5_MPESW_OP_ENABLE, + MLX5_MPESW_OP_DISABLE, +}; + +struct mlx5_mpesw_work_st { + struct work_struct work; + struct mlx5_lag *lag; + enum mpesw_op op; + struct completion comp; + int result; +}; + +int mlx5_lag_mpesw_do_mirred(struct mlx5_core_dev *mdev, + struct net_device *out_dev, + struct netlink_ext_ack *extack); +bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev); +void mlx5_lag_mpesw_disable(struct mlx5_core_dev *dev); +int mlx5_lag_mpesw_enable(struct mlx5_core_dev *dev); + +#endif /* __MLX5_LAG_MPESW_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c new file mode 100644 index 0000000000..7d9bbb494d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -0,0 +1,637 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#include +#include "lag.h" + +enum { + MLX5_LAG_FT_LEVEL_TTC, + MLX5_LAG_FT_LEVEL_INNER_TTC, + MLX5_LAG_FT_LEVEL_DEFINER, +}; + +static struct mlx5_flow_group * +mlx5_create_hash_flow_group(struct mlx5_flow_table *ft, + struct mlx5_flow_definer *definer, + u8 rules) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_definer_id, + mlx5_get_match_definer_id(definer)); + MLX5_SET(create_flow_group_in, in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, in, end_flow_index, rules - 1); + MLX5_SET(create_flow_group_in, in, group_type, + MLX5_CREATE_FLOW_GROUP_IN_GROUP_TYPE_HASH_SPLIT); + + fg = mlx5_create_flow_group(ft, in); + kvfree(in); + return fg; +} + +static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, + struct mlx5_lag_definer *lag_definer, + u8 *ports) +{ + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_destination dest = {}; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_namespace *ns; + int err, i; + int idx; + int j; + + ft_attr.max_fte = ldev->ports * ldev->buckets; + ft_attr.level = MLX5_LAG_FT_LEVEL_DEFINER; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_PORT_SEL); + if (!ns) { + mlx5_core_warn(dev, "Failed to get port selection namespace\n"); + return -EOPNOTSUPP; + } + + lag_definer->ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(lag_definer->ft)) { + mlx5_core_warn(dev, "Failed to create port selection table\n"); + return PTR_ERR(lag_definer->ft); + } + + lag_definer->fg = mlx5_create_hash_flow_group(lag_definer->ft, + lag_definer->definer, + ft_attr.max_fte); + if (IS_ERR(lag_definer->fg)) { + err = PTR_ERR(lag_definer->fg); + goto destroy_ft; + } + + dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; + dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + flow_act.flags |= FLOW_ACT_NO_APPEND; + for (i = 0; i < ldev->ports; i++) { + for (j = 0; j < ldev->buckets; j++) { + u8 affinity; + + idx = i * ldev->buckets + j; + affinity = ports[idx]; + + dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[affinity - 1].dev, + vhca_id); + lag_definer->rules[idx] = mlx5_add_flow_rules(lag_definer->ft, + NULL, &flow_act, + &dest, 1); + if (IS_ERR(lag_definer->rules[idx])) { + err = PTR_ERR(lag_definer->rules[idx]); + while (i--) + while (j--) + mlx5_del_flow_rules(lag_definer->rules[idx]); + goto destroy_fg; + } + } + } + + return 0; + +destroy_fg: + mlx5_destroy_flow_group(lag_definer->fg); +destroy_ft: + mlx5_destroy_flow_table(lag_definer->ft); + return err; +} + +static int mlx5_lag_set_definer_inner(u32 *match_definer_mask, + enum mlx5_traffic_types tt) +{ + int format_id; + u8 *ipv6; + + switch (tt) { + case MLX5_TT_IPV4_UDP: + case MLX5_TT_IPV4_TCP: + format_id = 23; + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_l4_sport); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_l4_dport); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_ip_src_addr); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_ip_dest_addr); + break; + case MLX5_TT_IPV4: + format_id = 23; + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_l3_type); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_dmac_47_16); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_dmac_15_0); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_smac_47_16); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_smac_15_0); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_ip_src_addr); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_ip_dest_addr); + break; + case MLX5_TT_IPV6_TCP: + case MLX5_TT_IPV6_UDP: + format_id = 31; + MLX5_SET_TO_ONES(match_definer_format_31, match_definer_mask, + inner_l4_sport); + MLX5_SET_TO_ONES(match_definer_format_31, match_definer_mask, + inner_l4_dport); + ipv6 = MLX5_ADDR_OF(match_definer_format_31, match_definer_mask, + inner_ip_dest_addr); + memset(ipv6, 0xff, 16); + ipv6 = MLX5_ADDR_OF(match_definer_format_31, match_definer_mask, + inner_ip_src_addr); + memset(ipv6, 0xff, 16); + break; + case MLX5_TT_IPV6: + format_id = 32; + ipv6 = MLX5_ADDR_OF(match_definer_format_32, match_definer_mask, + inner_ip_dest_addr); + memset(ipv6, 0xff, 16); + ipv6 = MLX5_ADDR_OF(match_definer_format_32, match_definer_mask, + inner_ip_src_addr); + memset(ipv6, 0xff, 16); + MLX5_SET_TO_ONES(match_definer_format_32, match_definer_mask, + inner_dmac_47_16); + MLX5_SET_TO_ONES(match_definer_format_32, match_definer_mask, + inner_dmac_15_0); + MLX5_SET_TO_ONES(match_definer_format_32, match_definer_mask, + inner_smac_47_16); + MLX5_SET_TO_ONES(match_definer_format_32, match_definer_mask, + inner_smac_15_0); + break; + default: + format_id = 23; + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_l3_type); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_dmac_47_16); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_dmac_15_0); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_smac_47_16); + MLX5_SET_TO_ONES(match_definer_format_23, match_definer_mask, + inner_smac_15_0); + break; + } + + return format_id; +} + +static int mlx5_lag_set_definer(u32 *match_definer_mask, + enum mlx5_traffic_types tt, bool tunnel, + enum netdev_lag_hash hash) +{ + int format_id; + u8 *ipv6; + + if (tunnel) + return mlx5_lag_set_definer_inner(match_definer_mask, tt); + + switch (tt) { + case MLX5_TT_IPV4_UDP: + case MLX5_TT_IPV4_TCP: + format_id = 22; + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_l4_sport); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_l4_dport); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_ip_src_addr); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_ip_dest_addr); + break; + case MLX5_TT_IPV4: + format_id = 22; + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_l3_type); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_dmac_47_16); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_dmac_15_0); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_smac_47_16); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_smac_15_0); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_ip_src_addr); + MLX5_SET_TO_ONES(match_definer_format_22, match_definer_mask, + outer_ip_dest_addr); + break; + case MLX5_TT_IPV6_TCP: + case MLX5_TT_IPV6_UDP: + format_id = 29; + MLX5_SET_TO_ONES(match_definer_format_29, match_definer_mask, + outer_l4_sport); + MLX5_SET_TO_ONES(match_definer_format_29, match_definer_mask, + outer_l4_dport); + ipv6 = MLX5_ADDR_OF(match_definer_format_29, match_definer_mask, + outer_ip_dest_addr); + memset(ipv6, 0xff, 16); + ipv6 = MLX5_ADDR_OF(match_definer_format_29, match_definer_mask, + outer_ip_src_addr); + memset(ipv6, 0xff, 16); + break; + case MLX5_TT_IPV6: + format_id = 30; + ipv6 = MLX5_ADDR_OF(match_definer_format_30, match_definer_mask, + outer_ip_dest_addr); + memset(ipv6, 0xff, 16); + ipv6 = MLX5_ADDR_OF(match_definer_format_30, match_definer_mask, + outer_ip_src_addr); + memset(ipv6, 0xff, 16); + MLX5_SET_TO_ONES(match_definer_format_30, match_definer_mask, + outer_dmac_47_16); + MLX5_SET_TO_ONES(match_definer_format_30, match_definer_mask, + outer_dmac_15_0); + MLX5_SET_TO_ONES(match_definer_format_30, match_definer_mask, + outer_smac_47_16); + MLX5_SET_TO_ONES(match_definer_format_30, match_definer_mask, + outer_smac_15_0); + break; + default: + format_id = 0; + MLX5_SET_TO_ONES(match_definer_format_0, match_definer_mask, + outer_smac_47_16); + MLX5_SET_TO_ONES(match_definer_format_0, match_definer_mask, + outer_smac_15_0); + + if (hash == NETDEV_LAG_HASH_VLAN_SRCMAC) { + MLX5_SET_TO_ONES(match_definer_format_0, + match_definer_mask, + outer_first_vlan_vid); + break; + } + + MLX5_SET_TO_ONES(match_definer_format_0, match_definer_mask, + outer_ethertype); + MLX5_SET_TO_ONES(match_definer_format_0, match_definer_mask, + outer_dmac_47_16); + MLX5_SET_TO_ONES(match_definer_format_0, match_definer_mask, + outer_dmac_15_0); + break; + } + + return format_id; +} + +static struct mlx5_lag_definer * +mlx5_lag_create_definer(struct mlx5_lag *ldev, enum netdev_lag_hash hash, + enum mlx5_traffic_types tt, bool tunnel, u8 *ports) +{ + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_lag_definer *lag_definer; + u32 *match_definer_mask; + int format_id, err; + + lag_definer = kzalloc(sizeof(*lag_definer), GFP_KERNEL); + if (!lag_definer) + return ERR_PTR(-ENOMEM); + + match_definer_mask = kvzalloc(MLX5_FLD_SZ_BYTES(match_definer, + match_mask), + GFP_KERNEL); + if (!match_definer_mask) { + err = -ENOMEM; + goto free_lag_definer; + } + + format_id = mlx5_lag_set_definer(match_definer_mask, tt, tunnel, hash); + lag_definer->definer = + mlx5_create_match_definer(dev, MLX5_FLOW_NAMESPACE_PORT_SEL, + format_id, match_definer_mask); + if (IS_ERR(lag_definer->definer)) { + err = PTR_ERR(lag_definer->definer); + goto free_mask; + } + + err = mlx5_lag_create_port_sel_table(ldev, lag_definer, ports); + if (err) + goto destroy_match_definer; + + kvfree(match_definer_mask); + + return lag_definer; + +destroy_match_definer: + mlx5_destroy_match_definer(dev, lag_definer->definer); +free_mask: + kvfree(match_definer_mask); +free_lag_definer: + kfree(lag_definer); + return ERR_PTR(err); +} + +static void mlx5_lag_destroy_definer(struct mlx5_lag *ldev, + struct mlx5_lag_definer *lag_definer) +{ + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + int idx; + int i; + int j; + + for (i = 0; i < ldev->ports; i++) { + for (j = 0; j < ldev->buckets; j++) { + idx = i * ldev->buckets + j; + mlx5_del_flow_rules(lag_definer->rules[idx]); + } + } + mlx5_destroy_flow_group(lag_definer->fg); + mlx5_destroy_flow_table(lag_definer->ft); + mlx5_destroy_match_definer(dev, lag_definer->definer); + kfree(lag_definer); +} + +static void mlx5_lag_destroy_definers(struct mlx5_lag *ldev) +{ + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + int tt; + + for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { + if (port_sel->outer.definers[tt]) + mlx5_lag_destroy_definer(ldev, + port_sel->outer.definers[tt]); + if (port_sel->inner.definers[tt]) + mlx5_lag_destroy_definer(ldev, + port_sel->inner.definers[tt]); + } +} + +static int mlx5_lag_create_definers(struct mlx5_lag *ldev, + enum netdev_lag_hash hash_type, + u8 *ports) +{ + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + struct mlx5_lag_definer *lag_definer; + int tt, err; + + for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { + lag_definer = mlx5_lag_create_definer(ldev, hash_type, tt, + false, ports); + if (IS_ERR(lag_definer)) { + err = PTR_ERR(lag_definer); + goto destroy_definers; + } + port_sel->outer.definers[tt] = lag_definer; + + if (!port_sel->tunnel) + continue; + + lag_definer = + mlx5_lag_create_definer(ldev, hash_type, tt, + true, ports); + if (IS_ERR(lag_definer)) { + err = PTR_ERR(lag_definer); + goto destroy_definers; + } + port_sel->inner.definers[tt] = lag_definer; + } + + return 0; + +destroy_definers: + mlx5_lag_destroy_definers(ldev); + return err; +} + +static void set_tt_map(struct mlx5_lag_port_sel *port_sel, + enum netdev_lag_hash hash) +{ + port_sel->tunnel = false; + + switch (hash) { + case NETDEV_LAG_HASH_E34: + port_sel->tunnel = true; + fallthrough; + case NETDEV_LAG_HASH_L34: + set_bit(MLX5_TT_IPV4_TCP, port_sel->tt_map); + set_bit(MLX5_TT_IPV4_UDP, port_sel->tt_map); + set_bit(MLX5_TT_IPV6_TCP, port_sel->tt_map); + set_bit(MLX5_TT_IPV6_UDP, port_sel->tt_map); + set_bit(MLX5_TT_IPV4, port_sel->tt_map); + set_bit(MLX5_TT_IPV6, port_sel->tt_map); + set_bit(MLX5_TT_ANY, port_sel->tt_map); + break; + case NETDEV_LAG_HASH_E23: + port_sel->tunnel = true; + fallthrough; + case NETDEV_LAG_HASH_L23: + set_bit(MLX5_TT_IPV4, port_sel->tt_map); + set_bit(MLX5_TT_IPV6, port_sel->tt_map); + set_bit(MLX5_TT_ANY, port_sel->tt_map); + break; + default: + set_bit(MLX5_TT_ANY, port_sel->tt_map); + break; + } +} + +#define SET_IGNORE_DESTS_BITS(tt_map, dests) \ + do { \ + int idx; \ + \ + for_each_clear_bit(idx, tt_map, MLX5_NUM_TT) \ + set_bit(idx, dests); \ + } while (0) + +static void mlx5_lag_set_inner_ttc_params(struct mlx5_lag *ldev, + struct ttc_params *ttc_params) +{ + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + struct mlx5_flow_table_attr *ft_attr; + int tt; + + ttc_params->ns = mlx5_get_flow_namespace(dev, + MLX5_FLOW_NAMESPACE_PORT_SEL); + ft_attr = &ttc_params->ft_attr; + ft_attr->level = MLX5_LAG_FT_LEVEL_INNER_TTC; + + for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { + ttc_params->dests[tt].type = + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + ttc_params->dests[tt].ft = port_sel->inner.definers[tt]->ft; + } + SET_IGNORE_DESTS_BITS(port_sel->tt_map, ttc_params->ignore_dests); +} + +static void mlx5_lag_set_outer_ttc_params(struct mlx5_lag *ldev, + struct ttc_params *ttc_params) +{ + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + struct mlx5_flow_table_attr *ft_attr; + int tt; + + ttc_params->ns = mlx5_get_flow_namespace(dev, + MLX5_FLOW_NAMESPACE_PORT_SEL); + ft_attr = &ttc_params->ft_attr; + ft_attr->level = MLX5_LAG_FT_LEVEL_TTC; + + for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { + ttc_params->dests[tt].type = + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + ttc_params->dests[tt].ft = port_sel->outer.definers[tt]->ft; + } + SET_IGNORE_DESTS_BITS(port_sel->tt_map, ttc_params->ignore_dests); + + ttc_params->inner_ttc = port_sel->tunnel; + if (!port_sel->tunnel) + return; + + for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { + ttc_params->tunnel_dests[tt].type = + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + ttc_params->tunnel_dests[tt].ft = + mlx5_get_ttc_flow_table(port_sel->inner.ttc); + } +} + +static int mlx5_lag_create_ttc_table(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + struct ttc_params ttc_params = {}; + + mlx5_lag_set_outer_ttc_params(ldev, &ttc_params); + port_sel->outer.ttc = mlx5_create_ttc_table(dev, &ttc_params); + if (IS_ERR(port_sel->outer.ttc)) + return PTR_ERR(port_sel->outer.ttc); + + return 0; +} + +static int mlx5_lag_create_inner_ttc_table(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + struct ttc_params ttc_params = {}; + + mlx5_lag_set_inner_ttc_params(ldev, &ttc_params); + port_sel->inner.ttc = mlx5_create_inner_ttc_table(dev, &ttc_params); + if (IS_ERR(port_sel->inner.ttc)) + return PTR_ERR(port_sel->inner.ttc); + + return 0; +} + +int mlx5_lag_port_sel_create(struct mlx5_lag *ldev, + enum netdev_lag_hash hash_type, u8 *ports) +{ + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + int err; + + set_tt_map(port_sel, hash_type); + err = mlx5_lag_create_definers(ldev, hash_type, ports); + if (err) + return err; + + if (port_sel->tunnel) { + err = mlx5_lag_create_inner_ttc_table(ldev); + if (err) + goto destroy_definers; + } + + err = mlx5_lag_create_ttc_table(ldev); + if (err) + goto destroy_inner; + + return 0; + +destroy_inner: + if (port_sel->tunnel) + mlx5_destroy_ttc_table(port_sel->inner.ttc); +destroy_definers: + mlx5_lag_destroy_definers(ldev); + return err; +} + +static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev, + struct mlx5_lag_definer *def, + u8 *ports) +{ + struct mlx5_flow_destination dest = {}; + int idx; + int err; + int i; + int j; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; + dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + + for (i = 0; i < ldev->ports; i++) { + for (j = 0; j < ldev->buckets; j++) { + idx = i * ldev->buckets + j; + if (ldev->v2p_map[idx] == ports[idx]) + continue; + + dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[ports[idx] - 1].dev, + vhca_id); + err = mlx5_modify_rule_destination(def->rules[idx], &dest, NULL); + if (err) + return err; + } + } + + return 0; +} + +static int +mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev, + struct mlx5_lag_definer **definers, + u8 *ports) +{ + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + int err; + int tt; + + for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { + err = __mlx5_lag_modify_definers_destinations(ldev, definers[tt], ports); + if (err) + return err; + } + + return 0; +} + +int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports) +{ + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + int err; + + err = mlx5_lag_modify_definers_destinations(ldev, + port_sel->outer.definers, + ports); + if (err) + return err; + + if (!port_sel->tunnel) + return 0; + + return mlx5_lag_modify_definers_destinations(ldev, + port_sel->inner.definers, + ports); +} + +void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev) +{ + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; + + mlx5_destroy_ttc_table(port_sel->outer.ttc); + if (port_sel->tunnel) + mlx5_destroy_ttc_table(port_sel->inner.ttc); + mlx5_lag_destroy_definers(ldev); + memset(port_sel, 0, sizeof(*port_sel)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h new file mode 100644 index 0000000000..5ec3af2a3e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#ifndef __MLX5_LAG_FS_H__ +#define __MLX5_LAG_FS_H__ + +#include "lib/fs_ttc.h" + +struct mlx5_lag_definer { + struct mlx5_flow_definer *definer; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + /* Each port has ldev->buckets number of rules and they are arrange in + * [port * buckets .. port * buckets + buckets) locations + */ + struct mlx5_flow_handle *rules[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS]; +}; + +struct mlx5_lag_ttc { + struct mlx5_ttc_table *ttc; + struct mlx5_lag_definer *definers[MLX5_NUM_TT]; +}; + +struct mlx5_lag_port_sel { + DECLARE_BITMAP(tt_map, MLX5_NUM_TT); + bool tunnel; + struct mlx5_lag_ttc outer; + struct mlx5_lag_ttc inner; +}; + +#ifdef CONFIG_MLX5_ESWITCH + +int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports); +void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev); +int mlx5_lag_port_sel_create(struct mlx5_lag *ldev, + enum netdev_lag_hash hash_type, u8 *ports); + +#else /* CONFIG_MLX5_ESWITCH */ +static inline int mlx5_lag_port_sel_create(struct mlx5_lag *ldev, + enum netdev_lag_hash hash_type, + u8 *ports) +{ + return 0; +} + +static inline int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports) +{ + return 0; +} + +static inline void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev) {} +#endif /* CONFIG_MLX5_ESWITCH */ +#endif /* __MLX5_LAG_FS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c new file mode 100644 index 0000000000..58bd749b5e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c @@ -0,0 +1,432 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include +#include +#include "clock.h" +#include "aso.h" +#include "wq.h" + +struct mlx5_aso_cq { + /* data path - accessed per cqe */ + struct mlx5_cqwq wq; + + /* data path - accessed per napi poll */ + struct mlx5_core_cq mcq; + + /* control */ + struct mlx5_core_dev *mdev; + struct mlx5_wq_ctrl wq_ctrl; +} ____cacheline_aligned_in_smp; + +struct mlx5_aso { + /* data path */ + u16 cc; + u16 pc; + + struct mlx5_wqe_ctrl_seg *doorbell_cseg; + struct mlx5_aso_cq cq; + + /* read only */ + struct mlx5_wq_cyc wq; + void __iomem *uar_map; + u32 sqn; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + +} ____cacheline_aligned_in_smp; + +static void mlx5_aso_free_cq(struct mlx5_aso_cq *cq) +{ + mlx5_wq_destroy(&cq->wq_ctrl); +} + +static int mlx5_aso_alloc_cq(struct mlx5_core_dev *mdev, int numa_node, + void *cqc_data, struct mlx5_aso_cq *cq) +{ + struct mlx5_core_cq *mcq = &cq->mcq; + struct mlx5_wq_param param; + int err; + u32 i; + + param.buf_numa_node = numa_node; + param.db_numa_node = numa_node; + + err = mlx5_cqwq_create(mdev, ¶m, cqc_data, &cq->wq, &cq->wq_ctrl); + if (err) + return err; + + mcq->cqe_sz = 64; + mcq->set_ci_db = cq->wq_ctrl.db.db; + mcq->arm_db = cq->wq_ctrl.db.db + 1; + + for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); + + cqe->op_own = 0xf1; + } + + cq->mdev = mdev; + + return 0; +} + +static int create_aso_cq(struct mlx5_aso_cq *cq, void *cqc_data) +{ + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; + struct mlx5_core_dev *mdev = cq->mdev; + struct mlx5_core_cq *mcq = &cq->mcq; + void *in, *cqc; + int inlen, eqn; + int err; + + err = mlx5_comp_eqn_get(mdev, 0, &eqn); + if (err) + return err; + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + sizeof(u64) * cq->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + + memcpy(cqc, cqc_data, MLX5_ST_SZ_BYTES(cqc)); + + mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); + + MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); + MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); + MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); + + err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out)); + + kvfree(in); + + return err; +} + +static void mlx5_aso_destroy_cq(struct mlx5_aso_cq *cq) +{ + mlx5_core_destroy_cq(cq->mdev, &cq->mcq); + mlx5_wq_destroy(&cq->wq_ctrl); +} + +static int mlx5_aso_create_cq(struct mlx5_core_dev *mdev, int numa_node, + struct mlx5_aso_cq *cq) +{ + void *cqc_data; + int err; + + cqc_data = kvzalloc(MLX5_ST_SZ_BYTES(cqc), GFP_KERNEL); + if (!cqc_data) + return -ENOMEM; + + MLX5_SET(cqc, cqc_data, log_cq_size, 1); + MLX5_SET(cqc, cqc_data, uar_page, mdev->priv.uar->index); + if (MLX5_CAP_GEN(mdev, cqe_128_always) && cache_line_size() >= 128) + MLX5_SET(cqc, cqc_data, cqe_sz, CQE_STRIDE_128_PAD); + + err = mlx5_aso_alloc_cq(mdev, numa_node, cqc_data, cq); + if (err) { + mlx5_core_err(mdev, "Failed to alloc aso wq cq, err=%d\n", err); + goto err_out; + } + + err = create_aso_cq(cq, cqc_data); + if (err) { + mlx5_core_err(mdev, "Failed to create aso wq cq, err=%d\n", err); + goto err_free_cq; + } + + kvfree(cqc_data); + return 0; + +err_free_cq: + mlx5_aso_free_cq(cq); +err_out: + kvfree(cqc_data); + return err; +} + +static int mlx5_aso_alloc_sq(struct mlx5_core_dev *mdev, int numa_node, + void *sqc_data, struct mlx5_aso *sq) +{ + void *sqc_wq = MLX5_ADDR_OF(sqc, sqc_data, wq); + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5_wq_param param; + int err; + + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; + + param.db_numa_node = numa_node; + param.buf_numa_node = numa_node; + err = mlx5_wq_cyc_create(mdev, ¶m, sqc_wq, wq, &sq->wq_ctrl); + if (err) + return err; + wq->db = &wq->db[MLX5_SND_DBR]; + + return 0; +} + +static int create_aso_sq(struct mlx5_core_dev *mdev, int pdn, + void *sqc_data, struct mlx5_aso *sq) +{ + void *in, *sqc, *wq; + int inlen, err; + u8 ts_format; + + inlen = MLX5_ST_SZ_BYTES(create_sq_in) + + sizeof(u64) * sq->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); + wq = MLX5_ADDR_OF(sqc, sqc, wq); + + memcpy(sqc, sqc_data, MLX5_ST_SZ_BYTES(sqc)); + MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); + + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); + MLX5_SET(sqc, sqc, flush_in_error_en, 1); + + ts_format = mlx5_is_real_time_sq(mdev) ? + MLX5_TIMESTAMP_FORMAT_REAL_TIME : + MLX5_TIMESTAMP_FORMAT_FREE_RUNNING; + MLX5_SET(sqc, sqc, ts_format, ts_format); + + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.hw_objs.bfreg.index); + MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma); + + mlx5_fill_page_frag_array(&sq->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); + + err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn); + + kvfree(in); + + return err; +} + +static int mlx5_aso_set_sq_rdy(struct mlx5_core_dev *mdev, u32 sqn) +{ + void *in, *sqc; + int inlen, err; + + inlen = MLX5_ST_SZ_BYTES(modify_sq_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RST); + sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY); + + err = mlx5_core_modify_sq(mdev, sqn, in); + + kvfree(in); + + return err; +} + +static int mlx5_aso_create_sq_rdy(struct mlx5_core_dev *mdev, u32 pdn, + void *sqc_data, struct mlx5_aso *sq) +{ + int err; + + err = create_aso_sq(mdev, pdn, sqc_data, sq); + if (err) + return err; + + err = mlx5_aso_set_sq_rdy(mdev, sq->sqn); + if (err) + mlx5_core_destroy_sq(mdev, sq->sqn); + + return err; +} + +static void mlx5_aso_free_sq(struct mlx5_aso *sq) +{ + mlx5_wq_destroy(&sq->wq_ctrl); +} + +static void mlx5_aso_destroy_sq(struct mlx5_aso *sq) +{ + mlx5_core_destroy_sq(sq->cq.mdev, sq->sqn); + mlx5_aso_free_sq(sq); +} + +static int mlx5_aso_create_sq(struct mlx5_core_dev *mdev, int numa_node, + u32 pdn, struct mlx5_aso *sq) +{ + void *sqc_data, *wq; + int err; + + sqc_data = kvzalloc(MLX5_ST_SZ_BYTES(sqc), GFP_KERNEL); + if (!sqc_data) + return -ENOMEM; + + wq = MLX5_ADDR_OF(sqc, sqc_data, wq); + MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); + MLX5_SET(wq, wq, pd, pdn); + MLX5_SET(wq, wq, log_wq_sz, 1); + + err = mlx5_aso_alloc_sq(mdev, numa_node, sqc_data, sq); + if (err) { + mlx5_core_err(mdev, "Failed to alloc aso wq sq, err=%d\n", err); + goto err_out; + } + + err = mlx5_aso_create_sq_rdy(mdev, pdn, sqc_data, sq); + if (err) { + mlx5_core_err(mdev, "Failed to open aso wq sq, err=%d\n", err); + goto err_free_asosq; + } + + mlx5_core_dbg(mdev, "aso sq->sqn = 0x%x\n", sq->sqn); + + kvfree(sqc_data); + return 0; + +err_free_asosq: + mlx5_aso_free_sq(sq); +err_out: + kvfree(sqc_data); + return err; +} + +struct mlx5_aso *mlx5_aso_create(struct mlx5_core_dev *mdev, u32 pdn) +{ + int numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); + struct mlx5_aso *aso; + int err; + + aso = kzalloc(sizeof(*aso), GFP_KERNEL); + if (!aso) + return ERR_PTR(-ENOMEM); + + err = mlx5_aso_create_cq(mdev, numa_node, &aso->cq); + if (err) + goto err_cq; + + err = mlx5_aso_create_sq(mdev, numa_node, pdn, aso); + if (err) + goto err_sq; + + return aso; + +err_sq: + mlx5_aso_destroy_cq(&aso->cq); +err_cq: + kfree(aso); + return ERR_PTR(err); +} + +void mlx5_aso_destroy(struct mlx5_aso *aso) +{ + mlx5_aso_destroy_sq(aso); + mlx5_aso_destroy_cq(&aso->cq); + kfree(aso); +} + +void mlx5_aso_build_wqe(struct mlx5_aso *aso, u8 ds_cnt, + struct mlx5_aso_wqe *aso_wqe, + u32 obj_id, u32 opc_mode) +{ + struct mlx5_wqe_ctrl_seg *cseg = &aso_wqe->ctrl; + + cseg->opmod_idx_opcode = cpu_to_be32((opc_mode << MLX5_WQE_CTRL_WQE_OPC_MOD_SHIFT) | + (aso->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | + MLX5_OPCODE_ACCESS_ASO); + cseg->qpn_ds = cpu_to_be32((aso->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | ds_cnt); + cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + cseg->general_id = cpu_to_be32(obj_id); +} + +struct mlx5_aso_wqe *mlx5_aso_get_wqe(struct mlx5_aso *aso) +{ + struct mlx5_aso_wqe *wqe; + u16 pi; + + pi = mlx5_wq_cyc_ctr2ix(&aso->wq, aso->pc); + wqe = mlx5_wq_cyc_get_wqe(&aso->wq, pi); + memset(wqe, 0, sizeof(*wqe)); + return wqe; +} + +void mlx5_aso_post_wqe(struct mlx5_aso *aso, bool with_data, + struct mlx5_wqe_ctrl_seg *doorbell_cseg) +{ + doorbell_cseg->fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE; + /* ensure wqe is visible to device before updating doorbell record */ + dma_wmb(); + + if (with_data) + aso->pc += MLX5_ASO_WQEBBS_DATA; + else + aso->pc += MLX5_ASO_WQEBBS; + *aso->wq.db = cpu_to_be32(aso->pc); + + /* ensure doorbell record is visible to device before ringing the + * doorbell + */ + wmb(); + + mlx5_write64((__be32 *)doorbell_cseg, aso->uar_map); + + /* Ensure doorbell is written on uar_page before poll_cq */ + WRITE_ONCE(doorbell_cseg, NULL); +} + +int mlx5_aso_poll_cq(struct mlx5_aso *aso, bool with_data) +{ + struct mlx5_aso_cq *cq = &aso->cq; + struct mlx5_cqe64 *cqe; + + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (!cqe) + return -ETIMEDOUT; + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + mlx5_cqwq_pop(&cq->wq); + + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { + struct mlx5_err_cqe *err_cqe; + + mlx5_core_err(cq->mdev, "Bad OP in ASOSQ CQE: 0x%x\n", + get_cqe_opcode(cqe)); + + err_cqe = (struct mlx5_err_cqe *)cqe; + mlx5_core_err(cq->mdev, "vendor_err_synd=%x\n", + err_cqe->vendor_err_synd); + mlx5_core_err(cq->mdev, "syndrome=%x\n", + err_cqe->syndrome); + print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, + 16, 1, err_cqe, + sizeof(*err_cqe), false); + } + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + if (with_data) + aso->cc += MLX5_ASO_WQEBBS_DATA; + else + aso->cc += MLX5_ASO_WQEBBS; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h new file mode 100644 index 0000000000..afb078bbb8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_LIB_ASO_H__ +#define __MLX5_LIB_ASO_H__ + +#include +#include "mlx5_core.h" + +#define MLX5_ASO_WQEBBS \ + (DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe), MLX5_SEND_WQE_BB)) +#define MLX5_ASO_WQEBBS_DATA \ + (DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe_data), MLX5_SEND_WQE_BB)) +#define ASO_CTRL_READ_EN BIT(0) +#define MLX5_WQE_CTRL_WQE_OPC_MOD_SHIFT 24 +#define MLX5_MACSEC_ASO_DS_CNT (DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe), MLX5_SEND_WQE_DS)) + +#define ASO_CTRL_READ_EN BIT(0) +struct mlx5_wqe_aso_ctrl_seg { + __be32 va_h; + __be32 va_l; /* include read_enable */ + __be32 l_key; + u8 data_mask_mode; + u8 condition_1_0_operand; + u8 condition_1_0_offset; + u8 data_offset_condition_operand; + __be32 condition_0_data; + __be32 condition_0_mask; + __be32 condition_1_data; + __be32 condition_1_mask; + __be64 bitwise_data; + __be64 data_mask; +}; + +struct mlx5_wqe_aso_data_seg { + __be32 bytewise_data[16]; +}; + +struct mlx5_aso_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_aso_ctrl_seg aso_ctrl; +}; + +struct mlx5_aso_wqe_data { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_aso_ctrl_seg aso_ctrl; + struct mlx5_wqe_aso_data_seg aso_data; +}; + +enum { + MLX5_ASO_LOGICAL_AND, + MLX5_ASO_LOGICAL_OR, +}; + +enum { + MLX5_ASO_ALWAYS_FALSE, + MLX5_ASO_ALWAYS_TRUE, + MLX5_ASO_EQUAL, + MLX5_ASO_NOT_EQUAL, + MLX5_ASO_GREATER_OR_EQUAL, + MLX5_ASO_LESSER_OR_EQUAL, + MLX5_ASO_LESSER, + MLX5_ASO_GREATER, + MLX5_ASO_CYCLIC_GREATER, + MLX5_ASO_CYCLIC_LESSER, +}; + +enum { + MLX5_ASO_DATA_MASK_MODE_BITWISE_64BIT, + MLX5_ASO_DATA_MASK_MODE_BYTEWISE_64BYTE, + MLX5_ASO_DATA_MASK_MODE_CALCULATED_64BYTE, +}; + +enum { + MLX5_ACCESS_ASO_OPC_MOD_IPSEC = 0x0, + MLX5_ACCESS_ASO_OPC_MOD_FLOW_METER = 0x2, + MLX5_ACCESS_ASO_OPC_MOD_MACSEC = 0x5, +}; + +struct mlx5_aso; + +struct mlx5_aso_wqe *mlx5_aso_get_wqe(struct mlx5_aso *aso); +void mlx5_aso_build_wqe(struct mlx5_aso *aso, u8 ds_cnt, + struct mlx5_aso_wqe *aso_wqe, + u32 obj_id, u32 opc_mode); +void mlx5_aso_post_wqe(struct mlx5_aso *aso, bool with_data, + struct mlx5_wqe_ctrl_seg *doorbell_cseg); +int mlx5_aso_poll_cq(struct mlx5_aso *aso, bool with_data); + +struct mlx5_aso *mlx5_aso_create(struct mlx5_core_dev *mdev, u32 pdn); +void mlx5_aso_destroy(struct mlx5_aso *aso); +#endif /* __MLX5_LIB_ASO_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c new file mode 100644 index 0000000000..0c83ef1742 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -0,0 +1,1088 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include "lib/eq.h" +#include "en.h" +#include "clock.h" + +enum { + MLX5_PIN_MODE_IN = 0x0, + MLX5_PIN_MODE_OUT = 0x1, +}; + +enum { + MLX5_OUT_PATTERN_PULSE = 0x0, + MLX5_OUT_PATTERN_PERIODIC = 0x1, +}; + +enum { + MLX5_EVENT_MODE_DISABLE = 0x0, + MLX5_EVENT_MODE_REPETETIVE = 0x1, + MLX5_EVENT_MODE_ONCE_TILL_ARM = 0x2, +}; + +enum { + MLX5_MTPPS_FS_ENABLE = BIT(0x0), + MLX5_MTPPS_FS_PATTERN = BIT(0x2), + MLX5_MTPPS_FS_PIN_MODE = BIT(0x3), + MLX5_MTPPS_FS_TIME_STAMP = BIT(0x4), + MLX5_MTPPS_FS_OUT_PULSE_DURATION = BIT(0x5), + MLX5_MTPPS_FS_ENH_OUT_PER_ADJ = BIT(0x7), + MLX5_MTPPS_FS_NPPS_PERIOD = BIT(0x9), + MLX5_MTPPS_FS_OUT_PULSE_DURATION_NS = BIT(0xa), +}; + +enum { + MLX5_MTUTC_OPERATION_ADJUST_TIME_MIN = S16_MIN, + MLX5_MTUTC_OPERATION_ADJUST_TIME_MAX = S16_MAX, + MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MIN = -200000, + MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX = 200000, +}; + +static bool mlx5_real_time_mode(struct mlx5_core_dev *mdev) +{ + return (mlx5_is_real_time_rq(mdev) || mlx5_is_real_time_sq(mdev)); +} + +static bool mlx5_npps_real_time_supported(struct mlx5_core_dev *mdev) +{ + return (mlx5_real_time_mode(mdev) && + MLX5_CAP_MCAM_FEATURE(mdev, npps_period) && + MLX5_CAP_MCAM_FEATURE(mdev, out_pulse_duration_ns)); +} + +static bool mlx5_modify_mtutc_allowed(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_MCAM_FEATURE(mdev, ptpcyc2realtime_modify); +} + +static u32 mlx5_ptp_shift_constant(u32 dev_freq_khz) +{ + /* Optimal shift constant leads to corrections above just 1 scaled ppm. + * + * Two sets of equations are needed to derive the optimal shift + * constant for the cyclecounter. + * + * dev_freq_khz * 1000 / 2^shift_constant = 1 scaled_ppm + * ppb = scaled_ppm * 1000 / 2^16 + * + * Using the two equations together + * + * dev_freq_khz * 1000 / 1 scaled_ppm = 2^shift_constant + * dev_freq_khz * 2^16 / 1 ppb = 2^shift_constant + * dev_freq_khz = 2^(shift_constant - 16) + * + * then yields + * + * shift_constant = ilog2(dev_freq_khz) + 16 + */ + + return min(ilog2(dev_freq_khz) + 16, + ilog2((U32_MAX / NSEC_PER_MSEC) * dev_freq_khz)); +} + +static s32 mlx5_ptp_getmaxphase(struct ptp_clock_info *ptp) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev; + + mdev = container_of(clock, struct mlx5_core_dev, clock); + + return MLX5_CAP_MCAM_FEATURE(mdev, mtutc_time_adjustment_extended_range) ? + MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX : + MLX5_MTUTC_OPERATION_ADJUST_TIME_MAX; +} + +static bool mlx5_is_mtutc_time_adj_cap(struct mlx5_core_dev *mdev, s64 delta) +{ + s64 max = mlx5_ptp_getmaxphase(&mdev->clock.ptp_info); + + if (delta < -max || delta > max) + return false; + + return true; +} + +static int mlx5_set_mtutc(struct mlx5_core_dev *dev, u32 *mtutc, u32 size) +{ + u32 out[MLX5_ST_SZ_DW(mtutc_reg)] = {}; + + if (!MLX5_CAP_MCAM_REG(dev, mtutc)) + return -EOPNOTSUPP; + + return mlx5_core_access_reg(dev, mtutc, size, out, sizeof(out), + MLX5_REG_MTUTC, 0, 1); +} + +static u64 mlx5_read_time(struct mlx5_core_dev *dev, + struct ptp_system_timestamp *sts, + bool real_time) +{ + u32 timer_h, timer_h1, timer_l; + + timer_h = ioread32be(real_time ? &dev->iseg->real_time_h : + &dev->iseg->internal_timer_h); + ptp_read_system_prets(sts); + timer_l = ioread32be(real_time ? &dev->iseg->real_time_l : + &dev->iseg->internal_timer_l); + ptp_read_system_postts(sts); + timer_h1 = ioread32be(real_time ? &dev->iseg->real_time_h : + &dev->iseg->internal_timer_h); + if (timer_h != timer_h1) { + /* wrap around */ + ptp_read_system_prets(sts); + timer_l = ioread32be(real_time ? &dev->iseg->real_time_l : + &dev->iseg->internal_timer_l); + ptp_read_system_postts(sts); + } + + return real_time ? REAL_TIME_TO_NS(timer_h1, timer_l) : + (u64)timer_l | (u64)timer_h1 << 32; +} + +static u64 read_internal_timer(const struct cyclecounter *cc) +{ + struct mlx5_timer *timer = container_of(cc, struct mlx5_timer, cycles); + struct mlx5_clock *clock = container_of(timer, struct mlx5_clock, timer); + struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, + clock); + + return mlx5_read_time(mdev, NULL, false) & cc->mask; +} + +static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev) +{ + struct mlx5_ib_clock_info *clock_info = mdev->clock_info; + struct mlx5_clock *clock = &mdev->clock; + struct mlx5_timer *timer; + u32 sign; + + if (!clock_info) + return; + + sign = smp_load_acquire(&clock_info->sign); + smp_store_mb(clock_info->sign, + sign | MLX5_IB_CLOCK_INFO_KERNEL_UPDATING); + + timer = &clock->timer; + clock_info->cycles = timer->tc.cycle_last; + clock_info->mult = timer->cycles.mult; + clock_info->nsec = timer->tc.nsec; + clock_info->frac = timer->tc.frac; + + smp_store_release(&clock_info->sign, + sign + MLX5_IB_CLOCK_INFO_KERNEL_UPDATING * 2); +} + +static void mlx5_pps_out(struct work_struct *work) +{ + struct mlx5_pps *pps_info = container_of(work, struct mlx5_pps, + out_work); + struct mlx5_clock *clock = container_of(pps_info, struct mlx5_clock, + pps_info); + struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, + clock); + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + unsigned long flags; + int i; + + for (i = 0; i < clock->ptp_info.n_pins; i++) { + u64 tstart; + + write_seqlock_irqsave(&clock->lock, flags); + tstart = clock->pps_info.start[i]; + clock->pps_info.start[i] = 0; + write_sequnlock_irqrestore(&clock->lock, flags); + if (!tstart) + continue; + + MLX5_SET(mtpps_reg, in, pin, i); + MLX5_SET64(mtpps_reg, in, time_stamp, tstart); + MLX5_SET(mtpps_reg, in, field_select, MLX5_MTPPS_FS_TIME_STAMP); + mlx5_set_mtpps(mdev, in, sizeof(in)); + } +} + +static void mlx5_timestamp_overflow(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct mlx5_core_dev *mdev; + struct mlx5_timer *timer; + struct mlx5_clock *clock; + unsigned long flags; + + timer = container_of(dwork, struct mlx5_timer, overflow_work); + clock = container_of(timer, struct mlx5_clock, timer); + mdev = container_of(clock, struct mlx5_core_dev, clock); + + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + goto out; + + write_seqlock_irqsave(&clock->lock, flags); + timecounter_read(&timer->tc); + mlx5_update_clock_info_page(mdev); + write_sequnlock_irqrestore(&clock->lock, flags); + +out: + schedule_delayed_work(&timer->overflow_work, timer->overflow_period); +} + +static int mlx5_ptp_settime_real_time(struct mlx5_core_dev *mdev, + const struct timespec64 *ts) +{ + u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {}; + + if (!mlx5_modify_mtutc_allowed(mdev)) + return 0; + + if (ts->tv_sec < 0 || ts->tv_sec > U32_MAX || + ts->tv_nsec < 0 || ts->tv_nsec > NSEC_PER_SEC) + return -EINVAL; + + MLX5_SET(mtutc_reg, in, operation, MLX5_MTUTC_OPERATION_SET_TIME_IMMEDIATE); + MLX5_SET(mtutc_reg, in, utc_sec, ts->tv_sec); + MLX5_SET(mtutc_reg, in, utc_nsec, ts->tv_nsec); + + return mlx5_set_mtutc(mdev, in, sizeof(in)); +} + +static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_timer *timer = &clock->timer; + struct mlx5_core_dev *mdev; + unsigned long flags; + int err; + + mdev = container_of(clock, struct mlx5_core_dev, clock); + err = mlx5_ptp_settime_real_time(mdev, ts); + if (err) + return err; + + write_seqlock_irqsave(&clock->lock, flags); + timecounter_init(&timer->tc, &timer->cycles, timespec64_to_ns(ts)); + mlx5_update_clock_info_page(mdev); + write_sequnlock_irqrestore(&clock->lock, flags); + + return 0; +} + +static +struct timespec64 mlx5_ptp_gettimex_real_time(struct mlx5_core_dev *mdev, + struct ptp_system_timestamp *sts) +{ + struct timespec64 ts; + u64 time; + + time = mlx5_read_time(mdev, sts, true); + ts = ns_to_timespec64(time); + return ts; +} + +static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts, + struct ptp_system_timestamp *sts) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_timer *timer = &clock->timer; + struct mlx5_core_dev *mdev; + unsigned long flags; + u64 cycles, ns; + + mdev = container_of(clock, struct mlx5_core_dev, clock); + if (mlx5_real_time_mode(mdev)) { + *ts = mlx5_ptp_gettimex_real_time(mdev, sts); + goto out; + } + + write_seqlock_irqsave(&clock->lock, flags); + cycles = mlx5_read_time(mdev, sts, false); + ns = timecounter_cyc2time(&timer->tc, cycles); + write_sequnlock_irqrestore(&clock->lock, flags); + *ts = ns_to_timespec64(ns); +out: + return 0; +} + +static int mlx5_ptp_adjtime_real_time(struct mlx5_core_dev *mdev, s64 delta) +{ + u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {}; + + if (!mlx5_modify_mtutc_allowed(mdev)) + return 0; + + /* HW time adjustment range is checked. If out of range, settime instead */ + if (!mlx5_is_mtutc_time_adj_cap(mdev, delta)) { + struct timespec64 ts; + s64 ns; + + ts = mlx5_ptp_gettimex_real_time(mdev, NULL); + ns = timespec64_to_ns(&ts) + delta; + ts = ns_to_timespec64(ns); + return mlx5_ptp_settime_real_time(mdev, &ts); + } + + MLX5_SET(mtutc_reg, in, operation, MLX5_MTUTC_OPERATION_ADJUST_TIME); + MLX5_SET(mtutc_reg, in, time_adjustment, delta); + + return mlx5_set_mtutc(mdev, in, sizeof(in)); +} + +static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_timer *timer = &clock->timer; + struct mlx5_core_dev *mdev; + unsigned long flags; + int err; + + mdev = container_of(clock, struct mlx5_core_dev, clock); + + err = mlx5_ptp_adjtime_real_time(mdev, delta); + if (err) + return err; + write_seqlock_irqsave(&clock->lock, flags); + timecounter_adjtime(&timer->tc, delta); + mlx5_update_clock_info_page(mdev); + write_sequnlock_irqrestore(&clock->lock, flags); + + return 0; +} + +static int mlx5_ptp_adjphase(struct ptp_clock_info *ptp, s32 delta) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev; + + mdev = container_of(clock, struct mlx5_core_dev, clock); + + return mlx5_ptp_adjtime_real_time(mdev, delta); +} + +static int mlx5_ptp_freq_adj_real_time(struct mlx5_core_dev *mdev, long scaled_ppm) +{ + u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {}; + + if (!mlx5_modify_mtutc_allowed(mdev)) + return 0; + + MLX5_SET(mtutc_reg, in, operation, MLX5_MTUTC_OPERATION_ADJUST_FREQ_UTC); + + if (MLX5_CAP_MCAM_FEATURE(mdev, mtutc_freq_adj_units)) { + MLX5_SET(mtutc_reg, in, freq_adj_units, + MLX5_MTUTC_FREQ_ADJ_UNITS_SCALED_PPM); + MLX5_SET(mtutc_reg, in, freq_adjustment, scaled_ppm); + } else { + MLX5_SET(mtutc_reg, in, freq_adj_units, MLX5_MTUTC_FREQ_ADJ_UNITS_PPB); + MLX5_SET(mtutc_reg, in, freq_adjustment, scaled_ppm_to_ppb(scaled_ppm)); + } + + return mlx5_set_mtutc(mdev, in, sizeof(in)); +} + +static int mlx5_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_timer *timer = &clock->timer; + struct mlx5_core_dev *mdev; + unsigned long flags; + u32 mult; + int err; + + mdev = container_of(clock, struct mlx5_core_dev, clock); + + err = mlx5_ptp_freq_adj_real_time(mdev, scaled_ppm); + if (err) + return err; + + mult = (u32)adjust_by_scaled_ppm(timer->nominal_c_mult, scaled_ppm); + + write_seqlock_irqsave(&clock->lock, flags); + timecounter_read(&timer->tc); + timer->cycles.mult = mult; + mlx5_update_clock_info_page(mdev); + write_sequnlock_irqrestore(&clock->lock, flags); + + return 0; +} + +static int mlx5_extts_configure(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + struct mlx5_clock *clock = + container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev = + container_of(clock, struct mlx5_core_dev, clock); + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + u32 field_select = 0; + u8 pin_mode = 0; + u8 pattern = 0; + int pin = -1; + int err = 0; + + if (!MLX5_PPS_CAP(mdev)) + return -EOPNOTSUPP; + + /* Reject requests with unsupported flags */ + if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | + PTP_RISING_EDGE | + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS)) + return -EOPNOTSUPP; + + /* Reject requests to enable time stamping on both edges. */ + if ((rq->extts.flags & PTP_STRICT_FLAGS) && + (rq->extts.flags & PTP_ENABLE_FEATURE) && + (rq->extts.flags & PTP_EXTTS_EDGES) == PTP_EXTTS_EDGES) + return -EOPNOTSUPP; + + if (rq->extts.index >= clock->ptp_info.n_pins) + return -EINVAL; + + pin = ptp_find_pin(clock->ptp, PTP_PF_EXTTS, rq->extts.index); + if (pin < 0) + return -EBUSY; + + if (on) { + pin_mode = MLX5_PIN_MODE_IN; + pattern = !!(rq->extts.flags & PTP_FALLING_EDGE); + field_select = MLX5_MTPPS_FS_PIN_MODE | + MLX5_MTPPS_FS_PATTERN | + MLX5_MTPPS_FS_ENABLE; + } else { + field_select = MLX5_MTPPS_FS_ENABLE; + } + + MLX5_SET(mtpps_reg, in, pin, pin); + MLX5_SET(mtpps_reg, in, pin_mode, pin_mode); + MLX5_SET(mtpps_reg, in, pattern, pattern); + MLX5_SET(mtpps_reg, in, enable, on); + MLX5_SET(mtpps_reg, in, field_select, field_select); + + err = mlx5_set_mtpps(mdev, in, sizeof(in)); + if (err) + return err; + + return mlx5_set_mtppse(mdev, pin, 0, + MLX5_EVENT_MODE_REPETETIVE & on); +} + +static u64 find_target_cycles(struct mlx5_core_dev *mdev, s64 target_ns) +{ + struct mlx5_clock *clock = &mdev->clock; + u64 cycles_now, cycles_delta; + u64 nsec_now, nsec_delta; + struct mlx5_timer *timer; + unsigned long flags; + + timer = &clock->timer; + + cycles_now = mlx5_read_time(mdev, NULL, false); + write_seqlock_irqsave(&clock->lock, flags); + nsec_now = timecounter_cyc2time(&timer->tc, cycles_now); + nsec_delta = target_ns - nsec_now; + cycles_delta = div64_u64(nsec_delta << timer->cycles.shift, + timer->cycles.mult); + write_sequnlock_irqrestore(&clock->lock, flags); + + return cycles_now + cycles_delta; +} + +static u64 perout_conf_internal_timer(struct mlx5_core_dev *mdev, s64 sec) +{ + struct timespec64 ts = {}; + s64 target_ns; + + ts.tv_sec = sec; + target_ns = timespec64_to_ns(&ts); + + return find_target_cycles(mdev, target_ns); +} + +static u64 perout_conf_real_time(s64 sec, u32 nsec) +{ + return (u64)nsec | (u64)sec << 32; +} + +static int perout_conf_1pps(struct mlx5_core_dev *mdev, struct ptp_clock_request *rq, + u64 *time_stamp, bool real_time) +{ + struct timespec64 ts; + s64 ns; + + ts.tv_nsec = rq->perout.period.nsec; + ts.tv_sec = rq->perout.period.sec; + ns = timespec64_to_ns(&ts); + + if ((ns >> 1) != 500000000LL) + return -EINVAL; + + *time_stamp = real_time ? perout_conf_real_time(rq->perout.start.sec, 0) : + perout_conf_internal_timer(mdev, rq->perout.start.sec); + + return 0; +} + +#define MLX5_MAX_PULSE_DURATION (BIT(__mlx5_bit_sz(mtpps_reg, out_pulse_duration_ns)) - 1) +static int mlx5_perout_conf_out_pulse_duration(struct mlx5_core_dev *mdev, + struct ptp_clock_request *rq, + u32 *out_pulse_duration_ns) +{ + struct mlx5_pps *pps_info = &mdev->clock.pps_info; + u32 out_pulse_duration; + struct timespec64 ts; + + if (rq->perout.flags & PTP_PEROUT_DUTY_CYCLE) { + ts.tv_sec = rq->perout.on.sec; + ts.tv_nsec = rq->perout.on.nsec; + out_pulse_duration = (u32)timespec64_to_ns(&ts); + } else { + /* out_pulse_duration_ns should be up to 50% of the + * pulse period as default + */ + ts.tv_sec = rq->perout.period.sec; + ts.tv_nsec = rq->perout.period.nsec; + out_pulse_duration = (u32)timespec64_to_ns(&ts) >> 1; + } + + if (out_pulse_duration < pps_info->min_out_pulse_duration_ns || + out_pulse_duration > MLX5_MAX_PULSE_DURATION) { + mlx5_core_err(mdev, "NPPS pulse duration %u is not in [%llu, %lu]\n", + out_pulse_duration, pps_info->min_out_pulse_duration_ns, + MLX5_MAX_PULSE_DURATION); + return -EINVAL; + } + *out_pulse_duration_ns = out_pulse_duration; + + return 0; +} + +static int perout_conf_npps_real_time(struct mlx5_core_dev *mdev, struct ptp_clock_request *rq, + u32 *field_select, u32 *out_pulse_duration_ns, + u64 *period, u64 *time_stamp) +{ + struct mlx5_pps *pps_info = &mdev->clock.pps_info; + struct ptp_clock_time *time = &rq->perout.start; + struct timespec64 ts; + + ts.tv_sec = rq->perout.period.sec; + ts.tv_nsec = rq->perout.period.nsec; + if (timespec64_to_ns(&ts) < pps_info->min_npps_period) { + mlx5_core_err(mdev, "NPPS period is lower than minimal npps period %llu\n", + pps_info->min_npps_period); + return -EINVAL; + } + *period = perout_conf_real_time(rq->perout.period.sec, rq->perout.period.nsec); + + if (mlx5_perout_conf_out_pulse_duration(mdev, rq, out_pulse_duration_ns)) + return -EINVAL; + + *time_stamp = perout_conf_real_time(time->sec, time->nsec); + *field_select |= MLX5_MTPPS_FS_NPPS_PERIOD | + MLX5_MTPPS_FS_OUT_PULSE_DURATION_NS; + + return 0; +} + +static bool mlx5_perout_verify_flags(struct mlx5_core_dev *mdev, unsigned int flags) +{ + return ((!mlx5_npps_real_time_supported(mdev) && flags) || + (mlx5_npps_real_time_supported(mdev) && flags & ~PTP_PEROUT_DUTY_CYCLE)); +} + +static int mlx5_perout_configure(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + struct mlx5_clock *clock = + container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev = + container_of(clock, struct mlx5_core_dev, clock); + bool rt_mode = mlx5_real_time_mode(mdev); + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + u32 out_pulse_duration_ns = 0; + u32 field_select = 0; + u64 npps_period = 0; + u64 time_stamp = 0; + u8 pin_mode = 0; + u8 pattern = 0; + int pin = -1; + int err = 0; + + if (!MLX5_PPS_CAP(mdev)) + return -EOPNOTSUPP; + + /* Reject requests with unsupported flags */ + if (mlx5_perout_verify_flags(mdev, rq->perout.flags)) + return -EOPNOTSUPP; + + if (rq->perout.index >= clock->ptp_info.n_pins) + return -EINVAL; + + field_select = MLX5_MTPPS_FS_ENABLE; + pin = ptp_find_pin(clock->ptp, PTP_PF_PEROUT, rq->perout.index); + if (pin < 0) + return -EBUSY; + + if (on) { + bool rt_mode = mlx5_real_time_mode(mdev); + + pin_mode = MLX5_PIN_MODE_OUT; + pattern = MLX5_OUT_PATTERN_PERIODIC; + + if (rt_mode && rq->perout.start.sec > U32_MAX) + return -EINVAL; + + field_select |= MLX5_MTPPS_FS_PIN_MODE | + MLX5_MTPPS_FS_PATTERN | + MLX5_MTPPS_FS_TIME_STAMP; + + if (mlx5_npps_real_time_supported(mdev)) + err = perout_conf_npps_real_time(mdev, rq, &field_select, + &out_pulse_duration_ns, &npps_period, + &time_stamp); + else + err = perout_conf_1pps(mdev, rq, &time_stamp, rt_mode); + if (err) + return err; + } + + MLX5_SET(mtpps_reg, in, pin, pin); + MLX5_SET(mtpps_reg, in, pin_mode, pin_mode); + MLX5_SET(mtpps_reg, in, pattern, pattern); + MLX5_SET(mtpps_reg, in, enable, on); + MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp); + MLX5_SET(mtpps_reg, in, field_select, field_select); + MLX5_SET64(mtpps_reg, in, npps_period, npps_period); + MLX5_SET(mtpps_reg, in, out_pulse_duration_ns, out_pulse_duration_ns); + err = mlx5_set_mtpps(mdev, in, sizeof(in)); + if (err) + return err; + + if (rt_mode) + return 0; + + return mlx5_set_mtppse(mdev, pin, 0, + MLX5_EVENT_MODE_REPETETIVE & on); +} + +static int mlx5_pps_configure(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + struct mlx5_clock *clock = + container_of(ptp, struct mlx5_clock, ptp_info); + + clock->pps_info.enabled = !!on; + return 0; +} + +static int mlx5_ptp_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + switch (rq->type) { + case PTP_CLK_REQ_EXTTS: + return mlx5_extts_configure(ptp, rq, on); + case PTP_CLK_REQ_PEROUT: + return mlx5_perout_configure(ptp, rq, on); + case PTP_CLK_REQ_PPS: + return mlx5_pps_configure(ptp, rq, on); + default: + return -EOPNOTSUPP; + } + return 0; +} + +enum { + MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_IN = BIT(0), + MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_OUT = BIT(1), +}; + +static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, + enum ptp_pin_function func, unsigned int chan) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, + ptp_info); + + switch (func) { + case PTP_PF_NONE: + return 0; + case PTP_PF_EXTTS: + return !(clock->pps_info.pin_caps[pin] & + MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_IN); + case PTP_PF_PEROUT: + return !(clock->pps_info.pin_caps[pin] & + MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_OUT); + default: + return -EOPNOTSUPP; + } +} + +static const struct ptp_clock_info mlx5_ptp_clock_info = { + .owner = THIS_MODULE, + .name = "mlx5_ptp", + .max_adj = 50000000, + .n_alarm = 0, + .n_ext_ts = 0, + .n_per_out = 0, + .n_pins = 0, + .pps = 0, + .adjfine = mlx5_ptp_adjfine, + .adjphase = mlx5_ptp_adjphase, + .getmaxphase = mlx5_ptp_getmaxphase, + .adjtime = mlx5_ptp_adjtime, + .gettimex64 = mlx5_ptp_gettimex, + .settime64 = mlx5_ptp_settime, + .enable = NULL, + .verify = NULL, +}; + +static int mlx5_query_mtpps_pin_mode(struct mlx5_core_dev *mdev, u8 pin, + u32 *mtpps, u32 mtpps_size) +{ + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {}; + + MLX5_SET(mtpps_reg, in, pin, pin); + + return mlx5_core_access_reg(mdev, in, sizeof(in), mtpps, + mtpps_size, MLX5_REG_MTPPS, 0, 0); +} + +static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin) +{ + struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock); + + u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {}; + u8 mode; + int err; + + err = mlx5_query_mtpps_pin_mode(mdev, pin, out, sizeof(out)); + if (err || !MLX5_GET(mtpps_reg, out, enable)) + return PTP_PF_NONE; + + mode = MLX5_GET(mtpps_reg, out, pin_mode); + + if (mode == MLX5_PIN_MODE_IN) + return PTP_PF_EXTTS; + else if (mode == MLX5_PIN_MODE_OUT) + return PTP_PF_PEROUT; + + return PTP_PF_NONE; +} + +static void mlx5_init_pin_config(struct mlx5_clock *clock) +{ + int i; + + if (!clock->ptp_info.n_pins) + return; + + clock->ptp_info.pin_config = + kcalloc(clock->ptp_info.n_pins, + sizeof(*clock->ptp_info.pin_config), + GFP_KERNEL); + if (!clock->ptp_info.pin_config) + return; + clock->ptp_info.enable = mlx5_ptp_enable; + clock->ptp_info.verify = mlx5_ptp_verify; + clock->ptp_info.pps = 1; + + for (i = 0; i < clock->ptp_info.n_pins; i++) { + snprintf(clock->ptp_info.pin_config[i].name, + sizeof(clock->ptp_info.pin_config[i].name), + "mlx5_pps%d", i); + clock->ptp_info.pin_config[i].index = i; + clock->ptp_info.pin_config[i].func = mlx5_get_pps_pin_mode(clock, i); + clock->ptp_info.pin_config[i].chan = 0; + } +} + +static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + + mlx5_query_mtpps(mdev, out, sizeof(out)); + + clock->ptp_info.n_pins = MLX5_GET(mtpps_reg, out, + cap_number_of_pps_pins); + clock->ptp_info.n_ext_ts = MLX5_GET(mtpps_reg, out, + cap_max_num_of_pps_in_pins); + clock->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out, + cap_max_num_of_pps_out_pins); + + if (MLX5_CAP_MCAM_FEATURE(mdev, npps_period)) + clock->pps_info.min_npps_period = 1 << MLX5_GET(mtpps_reg, out, + cap_log_min_npps_period); + if (MLX5_CAP_MCAM_FEATURE(mdev, out_pulse_duration_ns)) + clock->pps_info.min_out_pulse_duration_ns = 1 << MLX5_GET(mtpps_reg, out, + cap_log_min_out_pulse_duration_ns); + + clock->pps_info.pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode); + clock->pps_info.pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode); + clock->pps_info.pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode); + clock->pps_info.pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode); + clock->pps_info.pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode); + clock->pps_info.pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode); + clock->pps_info.pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode); + clock->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode); +} + +static void ts_next_sec(struct timespec64 *ts) +{ + ts->tv_sec += 1; + ts->tv_nsec = 0; +} + +static u64 perout_conf_next_event_timer(struct mlx5_core_dev *mdev, + struct mlx5_clock *clock) +{ + struct timespec64 ts; + s64 target_ns; + + mlx5_ptp_gettimex(&clock->ptp_info, &ts, NULL); + ts_next_sec(&ts); + target_ns = timespec64_to_ns(&ts); + + return find_target_cycles(mdev, target_ns); +} + +static int mlx5_pps_event(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb); + struct ptp_clock_event ptp_event; + struct mlx5_eqe *eqe = data; + int pin = eqe->data.pps.pin; + struct mlx5_core_dev *mdev; + unsigned long flags; + u64 ns; + + mdev = container_of(clock, struct mlx5_core_dev, clock); + + switch (clock->ptp_info.pin_config[pin].func) { + case PTP_PF_EXTTS: + ptp_event.index = pin; + ptp_event.timestamp = mlx5_real_time_mode(mdev) ? + mlx5_real_time_cyc2time(clock, + be64_to_cpu(eqe->data.pps.time_stamp)) : + mlx5_timecounter_cyc2time(clock, + be64_to_cpu(eqe->data.pps.time_stamp)); + if (clock->pps_info.enabled) { + ptp_event.type = PTP_CLOCK_PPSUSR; + ptp_event.pps_times.ts_real = + ns_to_timespec64(ptp_event.timestamp); + } else { + ptp_event.type = PTP_CLOCK_EXTTS; + } + /* TODOL clock->ptp can be NULL if ptp_clock_register fails */ + ptp_clock_event(clock->ptp, &ptp_event); + break; + case PTP_PF_PEROUT: + ns = perout_conf_next_event_timer(mdev, clock); + write_seqlock_irqsave(&clock->lock, flags); + clock->pps_info.start[pin] = ns; + write_sequnlock_irqrestore(&clock->lock, flags); + schedule_work(&clock->pps_info.out_work); + break; + default: + mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n", + clock->ptp_info.pin_config[pin].func); + } + + return NOTIFY_OK; +} + +static void mlx5_timecounter_init(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + struct mlx5_timer *timer = &clock->timer; + u32 dev_freq; + + dev_freq = MLX5_CAP_GEN(mdev, device_frequency_khz); + timer->cycles.read = read_internal_timer; + timer->cycles.shift = mlx5_ptp_shift_constant(dev_freq); + timer->cycles.mult = clocksource_khz2mult(dev_freq, + timer->cycles.shift); + timer->nominal_c_mult = timer->cycles.mult; + timer->cycles.mask = CLOCKSOURCE_MASK(41); + + timecounter_init(&timer->tc, &timer->cycles, + ktime_to_ns(ktime_get_real())); +} + +static void mlx5_init_overflow_period(struct mlx5_clock *clock) +{ + struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock); + struct mlx5_ib_clock_info *clock_info = mdev->clock_info; + struct mlx5_timer *timer = &clock->timer; + u64 overflow_cycles; + u64 frac = 0; + u64 ns; + + /* Calculate period in seconds to call the overflow watchdog - to make + * sure counter is checked at least twice every wrap around. + * The period is calculated as the minimum between max HW cycles count + * (The clock source mask) and max amount of cycles that can be + * multiplied by clock multiplier where the result doesn't exceed + * 64bits. + */ + overflow_cycles = div64_u64(~0ULL >> 1, timer->cycles.mult); + overflow_cycles = min(overflow_cycles, div_u64(timer->cycles.mask, 3)); + + ns = cyclecounter_cyc2ns(&timer->cycles, overflow_cycles, + frac, &frac); + do_div(ns, NSEC_PER_SEC / HZ); + timer->overflow_period = ns; + + INIT_DELAYED_WORK(&timer->overflow_work, mlx5_timestamp_overflow); + if (timer->overflow_period) + schedule_delayed_work(&timer->overflow_work, 0); + else + mlx5_core_warn(mdev, + "invalid overflow period, overflow_work is not scheduled\n"); + + if (clock_info) + clock_info->overflow_period = timer->overflow_period; +} + +static void mlx5_init_clock_info(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + struct mlx5_ib_clock_info *info; + struct mlx5_timer *timer; + + mdev->clock_info = (struct mlx5_ib_clock_info *)get_zeroed_page(GFP_KERNEL); + if (!mdev->clock_info) { + mlx5_core_warn(mdev, "Failed to allocate IB clock info page\n"); + return; + } + + info = mdev->clock_info; + timer = &clock->timer; + + info->nsec = timer->tc.nsec; + info->cycles = timer->tc.cycle_last; + info->mask = timer->cycles.mask; + info->mult = timer->nominal_c_mult; + info->shift = timer->cycles.shift; + info->frac = timer->tc.frac; +} + +static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + + mlx5_timecounter_init(mdev); + mlx5_init_clock_info(mdev); + mlx5_init_overflow_period(clock); + clock->ptp_info = mlx5_ptp_clock_info; + + if (mlx5_real_time_mode(mdev)) { + struct timespec64 ts; + + ktime_get_real_ts64(&ts); + mlx5_ptp_settime(&clock->ptp_info, &ts); + } +} + +static void mlx5_init_pps(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + + if (!MLX5_PPS_CAP(mdev)) + return; + + mlx5_get_pps_caps(mdev); + mlx5_init_pin_config(clock); +} + +void mlx5_init_clock(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + + if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) { + mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n"); + return; + } + + seqlock_init(&clock->lock); + mlx5_init_timer_clock(mdev); + INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out); + + /* Configure the PHC */ + clock->ptp_info = mlx5_ptp_clock_info; + + /* Initialize 1PPS data structures */ + mlx5_init_pps(mdev); + + clock->ptp = ptp_clock_register(&clock->ptp_info, + &mdev->pdev->dev); + if (IS_ERR(clock->ptp)) { + mlx5_core_warn(mdev, "ptp_clock_register failed %ld\n", + PTR_ERR(clock->ptp)); + clock->ptp = NULL; + } + + MLX5_NB_INIT(&clock->pps_nb, mlx5_pps_event, PPS_EVENT); + mlx5_eq_notifier_register(mdev, &clock->pps_nb); +} + +void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + + if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) + return; + + mlx5_eq_notifier_unregister(mdev, &clock->pps_nb); + if (clock->ptp) { + ptp_clock_unregister(clock->ptp); + clock->ptp = NULL; + } + + cancel_work_sync(&clock->pps_info.out_work); + cancel_delayed_work_sync(&clock->timer.overflow_work); + + if (mdev->clock_info) { + free_page((unsigned long)mdev->clock_info); + mdev->clock_info = NULL; + } + + kfree(clock->ptp_info.pin_config); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h new file mode 100644 index 0000000000..bd95b9f8d1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __LIB_CLOCK_H__ +#define __LIB_CLOCK_H__ + +static inline bool mlx5_is_real_time_rq(struct mlx5_core_dev *mdev) +{ + u8 rq_ts_format_cap = MLX5_CAP_GEN(mdev, rq_ts_format); + + return (rq_ts_format_cap == MLX5_TIMESTAMP_FORMAT_CAP_REAL_TIME || + rq_ts_format_cap == + MLX5_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME); +} + +static inline bool mlx5_is_real_time_sq(struct mlx5_core_dev *mdev) +{ + u8 sq_ts_format_cap = MLX5_CAP_GEN(mdev, sq_ts_format); + + return (sq_ts_format_cap == MLX5_TIMESTAMP_FORMAT_CAP_REAL_TIME || + sq_ts_format_cap == + MLX5_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME); +} + +typedef ktime_t (*cqe_ts_to_ns)(struct mlx5_clock *, u64); + +#if IS_ENABLED(CONFIG_PTP_1588_CLOCK) +void mlx5_init_clock(struct mlx5_core_dev *mdev); +void mlx5_cleanup_clock(struct mlx5_core_dev *mdev); + +static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev) +{ + return mdev->clock.ptp ? ptp_clock_index(mdev->clock.ptp) : -1; +} + +static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock, + u64 timestamp) +{ + struct mlx5_timer *timer = &clock->timer; + unsigned int seq; + u64 nsec; + + do { + seq = read_seqbegin(&clock->lock); + nsec = timecounter_cyc2time(&timer->tc, timestamp); + } while (read_seqretry(&clock->lock, seq)); + + return ns_to_ktime(nsec); +} + +#define REAL_TIME_TO_NS(hi, low) (((u64)hi) * NSEC_PER_SEC + ((u64)low)) + +static inline ktime_t mlx5_real_time_cyc2time(struct mlx5_clock *clock, + u64 timestamp) +{ + u64 time = REAL_TIME_TO_NS(timestamp >> 32, timestamp & 0xFFFFFFFF); + + return ns_to_ktime(time); +} +#else +static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {} +static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {} +static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev) +{ + return -1; +} + +static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock, + u64 timestamp) +{ + return 0; +} + +static inline ktime_t mlx5_real_time_cyc2time(struct mlx5_clock *clock, + u64 timestamp) +{ + return 0; +} +#endif + +static inline cqe_ts_to_ns mlx5_rq_ts_translator(struct mlx5_core_dev *mdev) +{ + return mlx5_is_real_time_rq(mdev) ? mlx5_real_time_cyc2time : + mlx5_timecounter_cyc2time; +} + +static inline cqe_ts_to_ns mlx5_sq_ts_translator(struct mlx5_core_dev *mdev) +{ + return mlx5_is_real_time_sq(mdev) ? mlx5_real_time_cyc2time : + mlx5_timecounter_cyc2time; +} +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c new file mode 100644 index 0000000000..3a94b8f803 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c @@ -0,0 +1,774 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies. + +#include "mlx5_core.h" +#include "lib/crypto.h" + +#define MLX5_CRYPTO_DEK_POOLS_NUM (MLX5_ACCEL_OBJ_TYPE_KEY_NUM - 1) +#define type2idx(type) ((type) - 1) + +#define MLX5_CRYPTO_DEK_POOL_SYNC_THRESH 128 + +/* calculate the num of DEKs, which are freed by any user + * (for example, TLS) after last revalidation in a pool or a bulk. + */ +#define MLX5_CRYPTO_DEK_CALC_FREED(a) \ + ({ typeof(a) _a = (a); \ + _a->num_deks - _a->avail_deks - _a->in_use_deks; }) + +#define MLX5_CRYPTO_DEK_POOL_CALC_FREED(pool) MLX5_CRYPTO_DEK_CALC_FREED(pool) +#define MLX5_CRYPTO_DEK_BULK_CALC_FREED(bulk) MLX5_CRYPTO_DEK_CALC_FREED(bulk) + +#define MLX5_CRYPTO_DEK_BULK_IDLE(bulk) \ + ({ typeof(bulk) _bulk = (bulk); \ + _bulk->avail_deks == _bulk->num_deks; }) + +enum { + MLX5_CRYPTO_DEK_ALL_TYPE = BIT(0), +}; + +struct mlx5_crypto_dek_pool { + struct mlx5_core_dev *mdev; + u32 key_purpose; + int num_deks; /* the total number of keys in this pool */ + int avail_deks; /* the number of available keys in this pool */ + int in_use_deks; /* the number of being used keys in this pool */ + struct mutex lock; /* protect the following lists, and the bulks */ + struct list_head partial_list; /* some of keys are available */ + struct list_head full_list; /* no available keys */ + struct list_head avail_list; /* all keys are available to use */ + + /* No in-used keys, and all need to be synced. + * These bulks will be put to avail list after sync. + */ + struct list_head sync_list; + + bool syncing; + struct list_head wait_for_free; + struct work_struct sync_work; + + spinlock_t destroy_lock; /* protect destroy_list */ + struct list_head destroy_list; + struct work_struct destroy_work; +}; + +struct mlx5_crypto_dek_bulk { + struct mlx5_core_dev *mdev; + int base_obj_id; + int avail_start; /* the bit to start search */ + int num_deks; /* the total number of keys in a bulk */ + int avail_deks; /* the number of keys available, with need_sync bit 0 */ + int in_use_deks; /* the number of keys being used, with in_use bit 1 */ + struct list_head entry; + + /* 0: not being used by any user, 1: otherwise */ + unsigned long *in_use; + + /* The bits are set when they are used, and reset after crypto_sync + * is executed. So, the value 0 means the key is newly created, or not + * used after sync, and 1 means it is in use, or freed but not synced + */ + unsigned long *need_sync; +}; + +struct mlx5_crypto_dek_priv { + struct mlx5_core_dev *mdev; + int log_dek_obj_range; +}; + +struct mlx5_crypto_dek { + struct mlx5_crypto_dek_bulk *bulk; + struct list_head entry; + u32 obj_id; +}; + +u32 mlx5_crypto_dek_get_id(struct mlx5_crypto_dek *dek) +{ + return dek->obj_id; +} + +static int mlx5_crypto_dek_get_key_sz(struct mlx5_core_dev *mdev, + u32 sz_bytes, u8 *key_sz_p) +{ + u32 sz_bits = sz_bytes * BITS_PER_BYTE; + + switch (sz_bits) { + case 128: + *key_sz_p = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_128; + break; + case 256: + *key_sz_p = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_256; + break; + default: + mlx5_core_err(mdev, "Crypto offload error, invalid key size (%u bits)\n", + sz_bits); + return -EINVAL; + } + + return 0; +} + +static int mlx5_crypto_dek_fill_key(struct mlx5_core_dev *mdev, u8 *key_obj, + const void *key, u32 sz_bytes) +{ + void *dst; + u8 key_sz; + int err; + + err = mlx5_crypto_dek_get_key_sz(mdev, sz_bytes, &key_sz); + if (err) + return err; + + MLX5_SET(encryption_key_obj, key_obj, key_size, key_sz); + + if (sz_bytes == 16) + /* For key size of 128b the MSBs are reserved. */ + dst = MLX5_ADDR_OF(encryption_key_obj, key_obj, key[1]); + else + dst = MLX5_ADDR_OF(encryption_key_obj, key_obj, key); + + memcpy(dst, key, sz_bytes); + + return 0; +} + +static int mlx5_crypto_cmd_sync_crypto(struct mlx5_core_dev *mdev, + int crypto_type) +{ + u32 in[MLX5_ST_SZ_DW(sync_crypto_in)] = {}; + int err; + + mlx5_core_dbg(mdev, + "Execute SYNC_CRYPTO command with crypto_type(0x%x)\n", + crypto_type); + + MLX5_SET(sync_crypto_in, in, opcode, MLX5_CMD_OP_SYNC_CRYPTO); + MLX5_SET(sync_crypto_in, in, crypto_type, crypto_type); + + err = mlx5_cmd_exec_in(mdev, sync_crypto, in); + if (err) + mlx5_core_err(mdev, + "Failed to exec sync crypto, type=%d, err=%d\n", + crypto_type, err); + + return err; +} + +static int mlx5_crypto_create_dek_bulk(struct mlx5_core_dev *mdev, + u32 key_purpose, int log_obj_range, + u32 *obj_id) +{ + u32 in[MLX5_ST_SZ_DW(create_encryption_key_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + void *obj, *param; + int err; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY); + param = MLX5_ADDR_OF(general_obj_in_cmd_hdr, in, op_param); + MLX5_SET(general_obj_create_param, param, log_obj_range, log_obj_range); + + obj = MLX5_ADDR_OF(create_encryption_key_in, in, encryption_key_object); + MLX5_SET(encryption_key_obj, obj, key_purpose, key_purpose); + MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.hw_objs.pdn); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + mlx5_core_dbg(mdev, "DEK objects created, bulk=%d, obj_id=%d\n", + 1 << log_obj_range, *obj_id); + + return 0; +} + +static int mlx5_crypto_modify_dek_key(struct mlx5_core_dev *mdev, + const void *key, u32 sz_bytes, u32 key_purpose, + u32 obj_id, u32 obj_offset) +{ + u32 in[MLX5_ST_SZ_DW(modify_encryption_key_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + void *obj, *param; + int err; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); + + param = MLX5_ADDR_OF(general_obj_in_cmd_hdr, in, op_param); + MLX5_SET(general_obj_query_param, param, obj_offset, obj_offset); + + obj = MLX5_ADDR_OF(modify_encryption_key_in, in, encryption_key_object); + MLX5_SET64(encryption_key_obj, obj, modify_field_select, 1); + MLX5_SET(encryption_key_obj, obj, key_purpose, key_purpose); + MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.hw_objs.pdn); + + err = mlx5_crypto_dek_fill_key(mdev, obj, key, sz_bytes); + if (err) + return err; + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + + /* avoid leaking key on the stack */ + memzero_explicit(in, sizeof(in)); + + return err; +} + +static int mlx5_crypto_create_dek_key(struct mlx5_core_dev *mdev, + const void *key, u32 sz_bytes, + u32 key_purpose, u32 *p_key_id) +{ + u32 in[MLX5_ST_SZ_DW(create_encryption_key_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + u64 general_obj_types; + void *obj; + int err; + + general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types); + if (!(general_obj_types & + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY)) + return -EINVAL; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY); + + obj = MLX5_ADDR_OF(create_encryption_key_in, in, encryption_key_object); + MLX5_SET(encryption_key_obj, obj, key_purpose, key_purpose); + MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.hw_objs.pdn); + + err = mlx5_crypto_dek_fill_key(mdev, obj, key, sz_bytes); + if (err) + return err; + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (!err) + *p_key_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + /* avoid leaking key on the stack */ + memzero_explicit(in, sizeof(in)); + + return err; +} + +static void mlx5_crypto_destroy_dek_key(struct mlx5_core_dev *mdev, u32 key_id) +{ + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, key_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, + const void *key, u32 sz_bytes, + u32 key_type, u32 *p_key_id) +{ + return mlx5_crypto_create_dek_key(mdev, key, sz_bytes, key_type, p_key_id); +} + +void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id) +{ + mlx5_crypto_destroy_dek_key(mdev, key_id); +} + +static struct mlx5_crypto_dek_bulk * +mlx5_crypto_dek_bulk_create(struct mlx5_crypto_dek_pool *pool) +{ + struct mlx5_crypto_dek_priv *dek_priv = pool->mdev->mlx5e_res.dek_priv; + struct mlx5_core_dev *mdev = pool->mdev; + struct mlx5_crypto_dek_bulk *bulk; + int num_deks, base_obj_id; + int err; + + bulk = kzalloc(sizeof(*bulk), GFP_KERNEL); + if (!bulk) + return ERR_PTR(-ENOMEM); + + num_deks = 1 << dek_priv->log_dek_obj_range; + bulk->need_sync = bitmap_zalloc(num_deks, GFP_KERNEL); + if (!bulk->need_sync) { + err = -ENOMEM; + goto err_out; + } + + bulk->in_use = bitmap_zalloc(num_deks, GFP_KERNEL); + if (!bulk->in_use) { + err = -ENOMEM; + goto err_out; + } + + err = mlx5_crypto_create_dek_bulk(mdev, pool->key_purpose, + dek_priv->log_dek_obj_range, + &base_obj_id); + if (err) + goto err_out; + + bulk->base_obj_id = base_obj_id; + bulk->num_deks = num_deks; + bulk->avail_deks = num_deks; + bulk->mdev = mdev; + + return bulk; + +err_out: + bitmap_free(bulk->in_use); + bitmap_free(bulk->need_sync); + kfree(bulk); + return ERR_PTR(err); +} + +static struct mlx5_crypto_dek_bulk * +mlx5_crypto_dek_pool_add_bulk(struct mlx5_crypto_dek_pool *pool) +{ + struct mlx5_crypto_dek_bulk *bulk; + + bulk = mlx5_crypto_dek_bulk_create(pool); + if (IS_ERR(bulk)) + return bulk; + + pool->avail_deks += bulk->num_deks; + pool->num_deks += bulk->num_deks; + list_add(&bulk->entry, &pool->partial_list); + + return bulk; +} + +static void mlx5_crypto_dek_bulk_free(struct mlx5_crypto_dek_bulk *bulk) +{ + mlx5_crypto_destroy_dek_key(bulk->mdev, bulk->base_obj_id); + bitmap_free(bulk->need_sync); + bitmap_free(bulk->in_use); + kfree(bulk); +} + +static void mlx5_crypto_dek_pool_remove_bulk(struct mlx5_crypto_dek_pool *pool, + struct mlx5_crypto_dek_bulk *bulk, + bool delay) +{ + pool->num_deks -= bulk->num_deks; + pool->avail_deks -= bulk->avail_deks; + pool->in_use_deks -= bulk->in_use_deks; + list_del(&bulk->entry); + if (!delay) + mlx5_crypto_dek_bulk_free(bulk); +} + +static struct mlx5_crypto_dek_bulk * +mlx5_crypto_dek_pool_pop(struct mlx5_crypto_dek_pool *pool, u32 *obj_offset) +{ + struct mlx5_crypto_dek_bulk *bulk; + int pos; + + mutex_lock(&pool->lock); + bulk = list_first_entry_or_null(&pool->partial_list, + struct mlx5_crypto_dek_bulk, entry); + + if (bulk) { + pos = find_next_zero_bit(bulk->need_sync, bulk->num_deks, + bulk->avail_start); + if (pos == bulk->num_deks) { + mlx5_core_err(pool->mdev, "Wrong DEK bulk avail_start.\n"); + pos = find_first_zero_bit(bulk->need_sync, bulk->num_deks); + } + WARN_ON(pos == bulk->num_deks); + } else { + bulk = list_first_entry_or_null(&pool->avail_list, + struct mlx5_crypto_dek_bulk, + entry); + if (bulk) { + list_move(&bulk->entry, &pool->partial_list); + } else { + bulk = mlx5_crypto_dek_pool_add_bulk(pool); + if (IS_ERR(bulk)) + goto out; + } + pos = 0; + } + + *obj_offset = pos; + bitmap_set(bulk->need_sync, pos, 1); + bitmap_set(bulk->in_use, pos, 1); + bulk->in_use_deks++; + bulk->avail_deks--; + if (!bulk->avail_deks) { + list_move(&bulk->entry, &pool->full_list); + bulk->avail_start = bulk->num_deks; + } else { + bulk->avail_start = pos + 1; + } + pool->avail_deks--; + pool->in_use_deks++; + +out: + mutex_unlock(&pool->lock); + return bulk; +} + +static bool mlx5_crypto_dek_need_sync(struct mlx5_crypto_dek_pool *pool) +{ + return !pool->syncing && + MLX5_CRYPTO_DEK_POOL_CALC_FREED(pool) > MLX5_CRYPTO_DEK_POOL_SYNC_THRESH; +} + +static int mlx5_crypto_dek_free_locked(struct mlx5_crypto_dek_pool *pool, + struct mlx5_crypto_dek *dek) +{ + struct mlx5_crypto_dek_bulk *bulk = dek->bulk; + int obj_offset; + bool old_val; + int err = 0; + + obj_offset = dek->obj_id - bulk->base_obj_id; + old_val = test_and_clear_bit(obj_offset, bulk->in_use); + WARN_ON_ONCE(!old_val); + if (!old_val) { + err = -ENOENT; + goto out_free; + } + pool->in_use_deks--; + bulk->in_use_deks--; + if (!bulk->avail_deks && !bulk->in_use_deks) + list_move(&bulk->entry, &pool->sync_list); + + if (mlx5_crypto_dek_need_sync(pool) && schedule_work(&pool->sync_work)) + pool->syncing = true; + +out_free: + kfree(dek); + return err; +} + +static int mlx5_crypto_dek_pool_push(struct mlx5_crypto_dek_pool *pool, + struct mlx5_crypto_dek *dek) +{ + int err = 0; + + mutex_lock(&pool->lock); + if (pool->syncing) + list_add(&dek->entry, &pool->wait_for_free); + else + err = mlx5_crypto_dek_free_locked(pool, dek); + mutex_unlock(&pool->lock); + + return err; +} + +/* Update the bits for a bulk while sync, and avail_next for search. + * As the combinations of (need_sync, in_use) of one DEK are + * - (0,0) means the key is ready for use, + * - (1,1) means the key is currently being used by a user, + * - (1,0) means the key is freed, and waiting for being synced, + * - (0,1) is invalid state. + * the number of revalidated DEKs can be calculated by + * hweight_long(need_sync XOR in_use), and the need_sync bits can be reset + * by simply copying from in_use bits. + */ +static void mlx5_crypto_dek_bulk_reset_synced(struct mlx5_crypto_dek_pool *pool, + struct mlx5_crypto_dek_bulk *bulk) +{ + unsigned long *need_sync = bulk->need_sync; + unsigned long *in_use = bulk->in_use; + int i, freed, reused, avail_next; + bool first = true; + + freed = MLX5_CRYPTO_DEK_BULK_CALC_FREED(bulk); + + for (i = 0; freed && i < BITS_TO_LONGS(bulk->num_deks); + i++, need_sync++, in_use++) { + reused = hweight_long((*need_sync) ^ (*in_use)); + if (!reused) + continue; + + bulk->avail_deks += reused; + pool->avail_deks += reused; + *need_sync = *in_use; + if (first) { + avail_next = i * BITS_PER_TYPE(long); + if (bulk->avail_start > avail_next) + bulk->avail_start = avail_next; + first = false; + } + + freed -= reused; + } +} + +/* Return true if the bulk is reused, false if destroyed with delay */ +static bool mlx5_crypto_dek_bulk_handle_avail(struct mlx5_crypto_dek_pool *pool, + struct mlx5_crypto_dek_bulk *bulk, + struct list_head *destroy_list) +{ + if (list_empty(&pool->avail_list)) { + list_move(&bulk->entry, &pool->avail_list); + return true; + } + + mlx5_crypto_dek_pool_remove_bulk(pool, bulk, true); + list_add(&bulk->entry, destroy_list); + return false; +} + +static void mlx5_crypto_dek_pool_splice_destroy_list(struct mlx5_crypto_dek_pool *pool, + struct list_head *list, + struct list_head *head) +{ + spin_lock(&pool->destroy_lock); + list_splice_init(list, head); + spin_unlock(&pool->destroy_lock); +} + +static void mlx5_crypto_dek_pool_free_wait_keys(struct mlx5_crypto_dek_pool *pool) +{ + struct mlx5_crypto_dek *dek, *next; + + list_for_each_entry_safe(dek, next, &pool->wait_for_free, entry) { + list_del(&dek->entry); + mlx5_crypto_dek_free_locked(pool, dek); + } +} + +/* For all the bulks in each list, reset the bits while sync. + * Move them to different lists according to the number of available DEKs. + * Destrory all the idle bulks, except one for quick service. + * And free DEKs in the waiting list at the end of this func. + */ +static void mlx5_crypto_dek_pool_reset_synced(struct mlx5_crypto_dek_pool *pool) +{ + struct mlx5_crypto_dek_bulk *bulk, *tmp; + LIST_HEAD(destroy_list); + + list_for_each_entry_safe(bulk, tmp, &pool->partial_list, entry) { + mlx5_crypto_dek_bulk_reset_synced(pool, bulk); + if (MLX5_CRYPTO_DEK_BULK_IDLE(bulk)) + mlx5_crypto_dek_bulk_handle_avail(pool, bulk, &destroy_list); + } + + list_for_each_entry_safe(bulk, tmp, &pool->full_list, entry) { + mlx5_crypto_dek_bulk_reset_synced(pool, bulk); + + if (!bulk->avail_deks) + continue; + + if (MLX5_CRYPTO_DEK_BULK_IDLE(bulk)) + mlx5_crypto_dek_bulk_handle_avail(pool, bulk, &destroy_list); + else + list_move(&bulk->entry, &pool->partial_list); + } + + list_for_each_entry_safe(bulk, tmp, &pool->sync_list, entry) { + bulk->avail_deks = bulk->num_deks; + pool->avail_deks += bulk->num_deks; + if (mlx5_crypto_dek_bulk_handle_avail(pool, bulk, &destroy_list)) { + memset(bulk->need_sync, 0, BITS_TO_BYTES(bulk->num_deks)); + bulk->avail_start = 0; + } + } + + mlx5_crypto_dek_pool_free_wait_keys(pool); + + if (!list_empty(&destroy_list)) { + mlx5_crypto_dek_pool_splice_destroy_list(pool, &destroy_list, + &pool->destroy_list); + schedule_work(&pool->destroy_work); + } +} + +static void mlx5_crypto_dek_sync_work_fn(struct work_struct *work) +{ + struct mlx5_crypto_dek_pool *pool = + container_of(work, struct mlx5_crypto_dek_pool, sync_work); + int err; + + err = mlx5_crypto_cmd_sync_crypto(pool->mdev, BIT(pool->key_purpose)); + mutex_lock(&pool->lock); + if (!err) + mlx5_crypto_dek_pool_reset_synced(pool); + pool->syncing = false; + mutex_unlock(&pool->lock); +} + +struct mlx5_crypto_dek *mlx5_crypto_dek_create(struct mlx5_crypto_dek_pool *dek_pool, + const void *key, u32 sz_bytes) +{ + struct mlx5_crypto_dek_priv *dek_priv = dek_pool->mdev->mlx5e_res.dek_priv; + struct mlx5_core_dev *mdev = dek_pool->mdev; + u32 key_purpose = dek_pool->key_purpose; + struct mlx5_crypto_dek_bulk *bulk; + struct mlx5_crypto_dek *dek; + int obj_offset; + int err; + + dek = kzalloc(sizeof(*dek), GFP_KERNEL); + if (!dek) + return ERR_PTR(-ENOMEM); + + if (!dek_priv) { + err = mlx5_crypto_create_dek_key(mdev, key, sz_bytes, + key_purpose, &dek->obj_id); + goto out; + } + + bulk = mlx5_crypto_dek_pool_pop(dek_pool, &obj_offset); + if (IS_ERR(bulk)) { + err = PTR_ERR(bulk); + goto out; + } + + dek->bulk = bulk; + dek->obj_id = bulk->base_obj_id + obj_offset; + err = mlx5_crypto_modify_dek_key(mdev, key, sz_bytes, key_purpose, + bulk->base_obj_id, obj_offset); + if (err) { + mlx5_crypto_dek_pool_push(dek_pool, dek); + return ERR_PTR(err); + } + +out: + if (err) { + kfree(dek); + return ERR_PTR(err); + } + + return dek; +} + +void mlx5_crypto_dek_destroy(struct mlx5_crypto_dek_pool *dek_pool, + struct mlx5_crypto_dek *dek) +{ + struct mlx5_crypto_dek_priv *dek_priv = dek_pool->mdev->mlx5e_res.dek_priv; + struct mlx5_core_dev *mdev = dek_pool->mdev; + + if (!dek_priv) { + mlx5_crypto_destroy_dek_key(mdev, dek->obj_id); + kfree(dek); + } else { + mlx5_crypto_dek_pool_push(dek_pool, dek); + } +} + +static void mlx5_crypto_dek_free_destroy_list(struct list_head *destroy_list) +{ + struct mlx5_crypto_dek_bulk *bulk, *tmp; + + list_for_each_entry_safe(bulk, tmp, destroy_list, entry) + mlx5_crypto_dek_bulk_free(bulk); +} + +static void mlx5_crypto_dek_destroy_work_fn(struct work_struct *work) +{ + struct mlx5_crypto_dek_pool *pool = + container_of(work, struct mlx5_crypto_dek_pool, destroy_work); + LIST_HEAD(destroy_list); + + mlx5_crypto_dek_pool_splice_destroy_list(pool, &pool->destroy_list, + &destroy_list); + mlx5_crypto_dek_free_destroy_list(&destroy_list); +} + +struct mlx5_crypto_dek_pool * +mlx5_crypto_dek_pool_create(struct mlx5_core_dev *mdev, int key_purpose) +{ + struct mlx5_crypto_dek_pool *pool; + + pool = kzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + return ERR_PTR(-ENOMEM); + + pool->mdev = mdev; + pool->key_purpose = key_purpose; + + mutex_init(&pool->lock); + INIT_LIST_HEAD(&pool->avail_list); + INIT_LIST_HEAD(&pool->partial_list); + INIT_LIST_HEAD(&pool->full_list); + INIT_LIST_HEAD(&pool->sync_list); + INIT_LIST_HEAD(&pool->wait_for_free); + INIT_WORK(&pool->sync_work, mlx5_crypto_dek_sync_work_fn); + spin_lock_init(&pool->destroy_lock); + INIT_LIST_HEAD(&pool->destroy_list); + INIT_WORK(&pool->destroy_work, mlx5_crypto_dek_destroy_work_fn); + + return pool; +} + +void mlx5_crypto_dek_pool_destroy(struct mlx5_crypto_dek_pool *pool) +{ + struct mlx5_crypto_dek_bulk *bulk, *tmp; + + cancel_work_sync(&pool->sync_work); + cancel_work_sync(&pool->destroy_work); + + mlx5_crypto_dek_pool_free_wait_keys(pool); + + list_for_each_entry_safe(bulk, tmp, &pool->avail_list, entry) + mlx5_crypto_dek_pool_remove_bulk(pool, bulk, false); + + list_for_each_entry_safe(bulk, tmp, &pool->full_list, entry) + mlx5_crypto_dek_pool_remove_bulk(pool, bulk, false); + + list_for_each_entry_safe(bulk, tmp, &pool->sync_list, entry) + mlx5_crypto_dek_pool_remove_bulk(pool, bulk, false); + + list_for_each_entry_safe(bulk, tmp, &pool->partial_list, entry) + mlx5_crypto_dek_pool_remove_bulk(pool, bulk, false); + + mlx5_crypto_dek_free_destroy_list(&pool->destroy_list); + + mutex_destroy(&pool->lock); + + kfree(pool); +} + +void mlx5_crypto_dek_cleanup(struct mlx5_crypto_dek_priv *dek_priv) +{ + if (!dek_priv) + return; + + kfree(dek_priv); +} + +struct mlx5_crypto_dek_priv *mlx5_crypto_dek_init(struct mlx5_core_dev *mdev) +{ + struct mlx5_crypto_dek_priv *dek_priv; + int err; + + if (!MLX5_CAP_CRYPTO(mdev, log_dek_max_alloc)) + return NULL; + + dek_priv = kzalloc(sizeof(*dek_priv), GFP_KERNEL); + if (!dek_priv) + return ERR_PTR(-ENOMEM); + + dek_priv->mdev = mdev; + dek_priv->log_dek_obj_range = min_t(int, 12, + MLX5_CAP_CRYPTO(mdev, log_dek_max_alloc)); + + /* sync all types of objects */ + err = mlx5_crypto_cmd_sync_crypto(mdev, MLX5_CRYPTO_DEK_ALL_TYPE); + if (err) + goto err_sync_crypto; + + mlx5_core_dbg(mdev, "Crypto DEK enabled, %d deks per alloc (max %d), total %d\n", + 1 << dek_priv->log_dek_obj_range, + 1 << MLX5_CAP_CRYPTO(mdev, log_dek_max_alloc), + 1 << MLX5_CAP_CRYPTO(mdev, log_max_num_deks)); + + return dek_priv; + +err_sync_crypto: + kfree(dek_priv); + return ERR_PTR(err); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.h new file mode 100644 index 0000000000..c819c047bb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_LIB_CRYPTO_H__ +#define __MLX5_LIB_CRYPTO_H__ + +enum { + MLX5_ACCEL_OBJ_TLS_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_TLS, + MLX5_ACCEL_OBJ_IPSEC_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_IPSEC, + MLX5_ACCEL_OBJ_MACSEC_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_MACSEC, + MLX5_ACCEL_OBJ_TYPE_KEY_NUM, +}; + +int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, + const void *key, u32 sz_bytes, + u32 key_type, u32 *p_key_id); + +void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id); + +struct mlx5_crypto_dek_pool; +struct mlx5_crypto_dek; + +struct mlx5_crypto_dek_pool *mlx5_crypto_dek_pool_create(struct mlx5_core_dev *mdev, + int key_purpose); +void mlx5_crypto_dek_pool_destroy(struct mlx5_crypto_dek_pool *pool); +struct mlx5_crypto_dek *mlx5_crypto_dek_create(struct mlx5_crypto_dek_pool *dek_pool, + const void *key, u32 sz_bytes); +void mlx5_crypto_dek_destroy(struct mlx5_crypto_dek_pool *dek_pool, + struct mlx5_crypto_dek *dek); +u32 mlx5_crypto_dek_get_id(struct mlx5_crypto_dek *dek); + +struct mlx5_crypto_dek_priv *mlx5_crypto_dek_init(struct mlx5_core_dev *mdev); +void mlx5_crypto_dek_cleanup(struct mlx5_crypto_dek_priv *dek_priv); +#endif /* __MLX5_LIB_CRYPTO_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c new file mode 100644 index 0000000000..00e67910e3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c @@ -0,0 +1,385 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies */ + +#include +#include +#include "lib/devcom.h" +#include "mlx5_core.h" + +static LIST_HEAD(devcom_dev_list); +static LIST_HEAD(devcom_comp_list); +/* protect device list */ +static DEFINE_MUTEX(dev_list_lock); +/* protect component list */ +static DEFINE_MUTEX(comp_list_lock); + +#define devcom_for_each_component(iter) \ + list_for_each_entry(iter, &devcom_comp_list, comp_list) + +struct mlx5_devcom_dev { + struct list_head list; + struct mlx5_core_dev *dev; + struct kref ref; +}; + +struct mlx5_devcom_comp { + struct list_head comp_list; + enum mlx5_devcom_component id; + u64 key; + struct list_head comp_dev_list_head; + mlx5_devcom_event_handler_t handler; + struct kref ref; + bool ready; + struct rw_semaphore sem; +}; + +struct mlx5_devcom_comp_dev { + struct list_head list; + struct mlx5_devcom_comp *comp; + struct mlx5_devcom_dev *devc; + void __rcu *data; +}; + +static bool devcom_dev_exists(struct mlx5_core_dev *dev) +{ + struct mlx5_devcom_dev *iter; + + list_for_each_entry(iter, &devcom_dev_list, list) + if (iter->dev == dev) + return true; + + return false; +} + +static struct mlx5_devcom_dev * +mlx5_devcom_dev_alloc(struct mlx5_core_dev *dev) +{ + struct mlx5_devcom_dev *devc; + + devc = kzalloc(sizeof(*devc), GFP_KERNEL); + if (!devc) + return NULL; + + devc->dev = dev; + kref_init(&devc->ref); + return devc; +} + +struct mlx5_devcom_dev * +mlx5_devcom_register_device(struct mlx5_core_dev *dev) +{ + struct mlx5_devcom_dev *devc; + + mutex_lock(&dev_list_lock); + + if (devcom_dev_exists(dev)) { + devc = ERR_PTR(-EEXIST); + goto out; + } + + devc = mlx5_devcom_dev_alloc(dev); + if (!devc) { + devc = ERR_PTR(-ENOMEM); + goto out; + } + + list_add_tail(&devc->list, &devcom_dev_list); +out: + mutex_unlock(&dev_list_lock); + return devc; +} + +static void +mlx5_devcom_dev_release(struct kref *ref) +{ + struct mlx5_devcom_dev *devc = container_of(ref, struct mlx5_devcom_dev, ref); + + mutex_lock(&dev_list_lock); + list_del(&devc->list); + mutex_unlock(&dev_list_lock); + kfree(devc); +} + +void mlx5_devcom_unregister_device(struct mlx5_devcom_dev *devc) +{ + if (!IS_ERR_OR_NULL(devc)) + kref_put(&devc->ref, mlx5_devcom_dev_release); +} + +static struct mlx5_devcom_comp * +mlx5_devcom_comp_alloc(u64 id, u64 key, mlx5_devcom_event_handler_t handler) +{ + struct mlx5_devcom_comp *comp; + + comp = kzalloc(sizeof(*comp), GFP_KERNEL); + if (!comp) + return ERR_PTR(-ENOMEM); + + comp->id = id; + comp->key = key; + comp->handler = handler; + init_rwsem(&comp->sem); + kref_init(&comp->ref); + INIT_LIST_HEAD(&comp->comp_dev_list_head); + + return comp; +} + +static void +mlx5_devcom_comp_release(struct kref *ref) +{ + struct mlx5_devcom_comp *comp = container_of(ref, struct mlx5_devcom_comp, ref); + + mutex_lock(&comp_list_lock); + list_del(&comp->comp_list); + mutex_unlock(&comp_list_lock); + kfree(comp); +} + +static struct mlx5_devcom_comp_dev * +devcom_alloc_comp_dev(struct mlx5_devcom_dev *devc, + struct mlx5_devcom_comp *comp, + void *data) +{ + struct mlx5_devcom_comp_dev *devcom; + + devcom = kzalloc(sizeof(*devcom), GFP_KERNEL); + if (!devcom) + return ERR_PTR(-ENOMEM); + + kref_get(&devc->ref); + devcom->devc = devc; + devcom->comp = comp; + rcu_assign_pointer(devcom->data, data); + + down_write(&comp->sem); + list_add_tail(&devcom->list, &comp->comp_dev_list_head); + up_write(&comp->sem); + + return devcom; +} + +static void +devcom_free_comp_dev(struct mlx5_devcom_comp_dev *devcom) +{ + struct mlx5_devcom_comp *comp = devcom->comp; + + down_write(&comp->sem); + list_del(&devcom->list); + up_write(&comp->sem); + + kref_put(&devcom->devc->ref, mlx5_devcom_dev_release); + kfree(devcom); + kref_put(&comp->ref, mlx5_devcom_comp_release); +} + +static bool +devcom_component_equal(struct mlx5_devcom_comp *devcom, + enum mlx5_devcom_component id, + u64 key) +{ + return devcom->id == id && devcom->key == key; +} + +static struct mlx5_devcom_comp * +devcom_component_get(struct mlx5_devcom_dev *devc, + enum mlx5_devcom_component id, + u64 key, + mlx5_devcom_event_handler_t handler) +{ + struct mlx5_devcom_comp *comp; + + devcom_for_each_component(comp) { + if (devcom_component_equal(comp, id, key)) { + if (handler == comp->handler) { + kref_get(&comp->ref); + return comp; + } + + mlx5_core_err(devc->dev, + "Cannot register existing devcom component with different handler\n"); + return ERR_PTR(-EINVAL); + } + } + + return NULL; +} + +struct mlx5_devcom_comp_dev * +mlx5_devcom_register_component(struct mlx5_devcom_dev *devc, + enum mlx5_devcom_component id, + u64 key, + mlx5_devcom_event_handler_t handler, + void *data) +{ + struct mlx5_devcom_comp_dev *devcom; + struct mlx5_devcom_comp *comp; + + if (IS_ERR_OR_NULL(devc)) + return NULL; + + mutex_lock(&comp_list_lock); + comp = devcom_component_get(devc, id, key, handler); + if (IS_ERR(comp)) { + devcom = ERR_PTR(-EINVAL); + goto out_unlock; + } + + if (!comp) { + comp = mlx5_devcom_comp_alloc(id, key, handler); + if (IS_ERR(comp)) { + devcom = ERR_CAST(comp); + goto out_unlock; + } + list_add_tail(&comp->comp_list, &devcom_comp_list); + } + mutex_unlock(&comp_list_lock); + + devcom = devcom_alloc_comp_dev(devc, comp, data); + if (IS_ERR(devcom)) + kref_put(&comp->ref, mlx5_devcom_comp_release); + + return devcom; + +out_unlock: + mutex_unlock(&comp_list_lock); + return devcom; +} + +void mlx5_devcom_unregister_component(struct mlx5_devcom_comp_dev *devcom) +{ + if (!IS_ERR_OR_NULL(devcom)) + devcom_free_comp_dev(devcom); +} + +int mlx5_devcom_send_event(struct mlx5_devcom_comp_dev *devcom, + int event, int rollback_event, + void *event_data) +{ + struct mlx5_devcom_comp_dev *pos; + struct mlx5_devcom_comp *comp; + int err = 0; + void *data; + + if (IS_ERR_OR_NULL(devcom)) + return -ENODEV; + + comp = devcom->comp; + down_write(&comp->sem); + list_for_each_entry(pos, &comp->comp_dev_list_head, list) { + data = rcu_dereference_protected(pos->data, lockdep_is_held(&comp->sem)); + + if (pos != devcom && data) { + err = comp->handler(event, data, event_data); + if (err) + goto rollback; + } + } + + up_write(&comp->sem); + return 0; + +rollback: + if (list_entry_is_head(pos, &comp->comp_dev_list_head, list)) + goto out; + pos = list_prev_entry(pos, list); + list_for_each_entry_from_reverse(pos, &comp->comp_dev_list_head, list) { + data = rcu_dereference_protected(pos->data, lockdep_is_held(&comp->sem)); + + if (pos != devcom && data) + comp->handler(rollback_event, data, event_data); + } +out: + up_write(&comp->sem); + return err; +} + +void mlx5_devcom_comp_set_ready(struct mlx5_devcom_comp_dev *devcom, bool ready) +{ + WARN_ON(!rwsem_is_locked(&devcom->comp->sem)); + + WRITE_ONCE(devcom->comp->ready, ready); +} + +bool mlx5_devcom_comp_is_ready(struct mlx5_devcom_comp_dev *devcom) +{ + if (IS_ERR_OR_NULL(devcom)) + return false; + + return READ_ONCE(devcom->comp->ready); +} + +bool mlx5_devcom_for_each_peer_begin(struct mlx5_devcom_comp_dev *devcom) +{ + struct mlx5_devcom_comp *comp; + + if (IS_ERR_OR_NULL(devcom)) + return false; + + comp = devcom->comp; + down_read(&comp->sem); + if (!READ_ONCE(comp->ready)) { + up_read(&comp->sem); + return false; + } + + return true; +} + +void mlx5_devcom_for_each_peer_end(struct mlx5_devcom_comp_dev *devcom) +{ + up_read(&devcom->comp->sem); +} + +void *mlx5_devcom_get_next_peer_data(struct mlx5_devcom_comp_dev *devcom, + struct mlx5_devcom_comp_dev **pos) +{ + struct mlx5_devcom_comp *comp = devcom->comp; + struct mlx5_devcom_comp_dev *tmp; + void *data; + + tmp = list_prepare_entry(*pos, &comp->comp_dev_list_head, list); + + list_for_each_entry_continue(tmp, &comp->comp_dev_list_head, list) { + if (tmp != devcom) { + data = rcu_dereference_protected(tmp->data, lockdep_is_held(&comp->sem)); + if (data) + break; + } + } + + if (list_entry_is_head(tmp, &comp->comp_dev_list_head, list)) + return NULL; + + *pos = tmp; + return data; +} + +void *mlx5_devcom_get_next_peer_data_rcu(struct mlx5_devcom_comp_dev *devcom, + struct mlx5_devcom_comp_dev **pos) +{ + struct mlx5_devcom_comp *comp = devcom->comp; + struct mlx5_devcom_comp_dev *tmp; + void *data; + + tmp = list_prepare_entry(*pos, &comp->comp_dev_list_head, list); + + list_for_each_entry_continue(tmp, &comp->comp_dev_list_head, list) { + if (tmp != devcom) { + /* This can change concurrently, however 'data' pointer will remain + * valid for the duration of RCU read section. + */ + if (!READ_ONCE(comp->ready)) + return NULL; + data = rcu_dereference(tmp->data); + if (data) + break; + } + } + + if (list_entry_is_head(tmp, &comp->comp_dev_list_head, list)) + return NULL; + + *pos = tmp; + return data; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h new file mode 100644 index 0000000000..8389ac0af7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies */ + +#ifndef __LIB_MLX5_DEVCOM_H__ +#define __LIB_MLX5_DEVCOM_H__ + +#include + +enum mlx5_devcom_component { + MLX5_DEVCOM_ESW_OFFLOADS, + MLX5_DEVCOM_NUM_COMPONENTS, +}; + +typedef int (*mlx5_devcom_event_handler_t)(int event, + void *my_data, + void *event_data); + +struct mlx5_devcom_dev *mlx5_devcom_register_device(struct mlx5_core_dev *dev); +void mlx5_devcom_unregister_device(struct mlx5_devcom_dev *devc); + +struct mlx5_devcom_comp_dev * +mlx5_devcom_register_component(struct mlx5_devcom_dev *devc, + enum mlx5_devcom_component id, + u64 key, + mlx5_devcom_event_handler_t handler, + void *data); +void mlx5_devcom_unregister_component(struct mlx5_devcom_comp_dev *devcom); + +int mlx5_devcom_send_event(struct mlx5_devcom_comp_dev *devcom, + int event, int rollback_event, + void *event_data); + +void mlx5_devcom_comp_set_ready(struct mlx5_devcom_comp_dev *devcom, bool ready); +bool mlx5_devcom_comp_is_ready(struct mlx5_devcom_comp_dev *devcom); + +bool mlx5_devcom_for_each_peer_begin(struct mlx5_devcom_comp_dev *devcom); +void mlx5_devcom_for_each_peer_end(struct mlx5_devcom_comp_dev *devcom); +void *mlx5_devcom_get_next_peer_data(struct mlx5_devcom_comp_dev *devcom, + struct mlx5_devcom_comp_dev **pos); + +#define mlx5_devcom_for_each_peer_entry(devcom, data, pos) \ + for (pos = NULL, data = mlx5_devcom_get_next_peer_data(devcom, &pos); \ + data; \ + data = mlx5_devcom_get_next_peer_data(devcom, &pos)) + +void *mlx5_devcom_get_next_peer_data_rcu(struct mlx5_devcom_comp_dev *devcom, + struct mlx5_devcom_comp_dev **pos); + +#define mlx5_devcom_for_each_peer_entry_rcu(devcom, data, pos) \ + for (pos = NULL, data = mlx5_devcom_get_next_peer_data_rcu(devcom, &pos); \ + data; \ + data = mlx5_devcom_get_next_peer_data_rcu(devcom, &pos)) + +#endif /* __LIB_MLX5_DEVCOM_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c new file mode 100644 index 0000000000..9482e51ac8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies + +#include +#include + +#include "mlx5_core.h" +#include "lib/mlx5.h" + +struct mlx5_dm { + /* protect access to icm bitmask */ + spinlock_t lock; + unsigned long *steering_sw_icm_alloc_blocks; + unsigned long *header_modify_sw_icm_alloc_blocks; + unsigned long *header_modify_pattern_sw_icm_alloc_blocks; +}; + +struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev) +{ + u64 header_modify_pattern_icm_blocks = 0; + u64 header_modify_icm_blocks = 0; + u64 steering_icm_blocks = 0; + struct mlx5_dm *dm; + bool support_v2; + + if (!(MLX5_CAP_GEN_64(dev, general_obj_types) & MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM)) + return NULL; + + dm = kzalloc(sizeof(*dm), GFP_KERNEL); + if (!dm) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&dm->lock); + + if (MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address)) { + steering_icm_blocks = + BIT(MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); + + dm->steering_sw_icm_alloc_blocks = + bitmap_zalloc(steering_icm_blocks, GFP_KERNEL); + if (!dm->steering_sw_icm_alloc_blocks) + goto err_steering; + } + + if (MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address)) { + header_modify_icm_blocks = + BIT(MLX5_CAP_DEV_MEM(dev, log_header_modify_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); + + dm->header_modify_sw_icm_alloc_blocks = + bitmap_zalloc(header_modify_icm_blocks, GFP_KERNEL); + if (!dm->header_modify_sw_icm_alloc_blocks) + goto err_modify_hdr; + } + + support_v2 = MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) && + MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2) && + MLX5_CAP64_DEV_MEM(dev, header_modify_pattern_sw_icm_start_address); + + if (support_v2) { + header_modify_pattern_icm_blocks = + BIT(MLX5_CAP_DEV_MEM(dev, log_header_modify_pattern_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); + + dm->header_modify_pattern_sw_icm_alloc_blocks = + bitmap_zalloc(header_modify_pattern_icm_blocks, GFP_KERNEL); + if (!dm->header_modify_pattern_sw_icm_alloc_blocks) + goto err_pattern; + } + + return dm; + +err_pattern: + bitmap_free(dm->header_modify_sw_icm_alloc_blocks); + +err_modify_hdr: + bitmap_free(dm->steering_sw_icm_alloc_blocks); + +err_steering: + kfree(dm); + + return ERR_PTR(-ENOMEM); +} + +void mlx5_dm_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_dm *dm = dev->dm; + + if (!dev->dm) + return; + + if (dm->steering_sw_icm_alloc_blocks) { + WARN_ON(!bitmap_empty(dm->steering_sw_icm_alloc_blocks, + BIT(MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)))); + bitmap_free(dm->steering_sw_icm_alloc_blocks); + } + + if (dm->header_modify_sw_icm_alloc_blocks) { + WARN_ON(!bitmap_empty(dm->header_modify_sw_icm_alloc_blocks, + BIT(MLX5_CAP_DEV_MEM(dev, + log_header_modify_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)))); + bitmap_free(dm->header_modify_sw_icm_alloc_blocks); + } + + if (dm->header_modify_pattern_sw_icm_alloc_blocks) { + WARN_ON(!bitmap_empty(dm->header_modify_pattern_sw_icm_alloc_blocks, + BIT(MLX5_CAP_DEV_MEM(dev, + log_header_modify_pattern_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)))); + bitmap_free(dm->header_modify_pattern_sw_icm_alloc_blocks); + } + + kfree(dm); +} + +int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, + u64 length, u32 log_alignment, u16 uid, + phys_addr_t *addr, u32 *obj_id) +{ + u32 num_blocks = DIV_ROUND_UP_ULL(length, MLX5_SW_ICM_BLOCK_SIZE(dev)); + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {}; + struct mlx5_dm *dm = dev->dm; + unsigned long *block_map; + u64 icm_start_addr; + u32 log_icm_size; + u64 align_mask; + u32 max_blocks; + u64 block_idx; + void *sw_icm; + int ret; + + if (!dev->dm) + return -EOPNOTSUPP; + + if (!length || (length & (length - 1)) || + length & (MLX5_SW_ICM_BLOCK_SIZE(dev) - 1)) + return -EINVAL; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM); + MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid); + + switch (type) { + case MLX5_SW_ICM_TYPE_STEERING: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address); + log_icm_size = MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size); + block_map = dm->steering_sw_icm_alloc_blocks; + break; + case MLX5_SW_ICM_TYPE_HEADER_MODIFY: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address); + log_icm_size = MLX5_CAP_DEV_MEM(dev, + log_header_modify_sw_icm_size); + block_map = dm->header_modify_sw_icm_alloc_blocks; + break; + case MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, + header_modify_pattern_sw_icm_start_address); + log_icm_size = MLX5_CAP_DEV_MEM(dev, + log_header_modify_pattern_sw_icm_size); + block_map = dm->header_modify_pattern_sw_icm_alloc_blocks; + break; + default: + return -EINVAL; + } + + if (!block_map) + return -EOPNOTSUPP; + + max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); + + if (log_alignment < MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)) + log_alignment = MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); + align_mask = BIT(log_alignment - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)) - 1; + + spin_lock(&dm->lock); + block_idx = bitmap_find_next_zero_area(block_map, max_blocks, 0, + num_blocks, align_mask); + + if (block_idx < max_blocks) + bitmap_set(block_map, + block_idx, num_blocks); + + spin_unlock(&dm->lock); + + if (block_idx >= max_blocks) + return -ENOMEM; + + sw_icm = MLX5_ADDR_OF(create_sw_icm_in, in, sw_icm); + icm_start_addr += block_idx << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); + MLX5_SET64(sw_icm, sw_icm, sw_icm_start_addr, + icm_start_addr); + MLX5_SET(sw_icm, sw_icm, log_sw_icm_size, ilog2(length)); + + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (ret) { + spin_lock(&dm->lock); + bitmap_clear(block_map, + block_idx, num_blocks); + spin_unlock(&dm->lock); + + return ret; + } + + *addr = icm_start_addr; + *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_dm_sw_icm_alloc); + +int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, + u64 length, u16 uid, phys_addr_t addr, u32 obj_id) +{ + u32 num_blocks = DIV_ROUND_UP_ULL(length, MLX5_SW_ICM_BLOCK_SIZE(dev)); + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + struct mlx5_dm *dm = dev->dm; + unsigned long *block_map; + u64 icm_start_addr; + u64 start_idx; + int err; + + if (!dev->dm) + return -EOPNOTSUPP; + + switch (type) { + case MLX5_SW_ICM_TYPE_STEERING: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address); + block_map = dm->steering_sw_icm_alloc_blocks; + break; + case MLX5_SW_ICM_TYPE_HEADER_MODIFY: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address); + block_map = dm->header_modify_sw_icm_alloc_blocks; + break; + case MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, + header_modify_pattern_sw_icm_start_address); + block_map = dm->header_modify_pattern_sw_icm_alloc_blocks; + break; + default: + return -EINVAL; + } + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); + MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + start_idx = (addr - icm_start_addr) >> MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); + spin_lock(&dm->lock); + bitmap_clear(block_map, + start_idx, num_blocks); + spin_unlock(&dm->lock); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_dm_sw_icm_dealloc); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h new file mode 100644 index 0000000000..69a7545977 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018-2021, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __LIB_MLX5_EQ_H__ +#define __LIB_MLX5_EQ_H__ +#include +#include +#include + +#define MLX5_EQE_SIZE (sizeof(struct mlx5_eqe)) + +struct mlx5_eq_tasklet { + struct list_head list; + struct list_head process_list; + struct tasklet_struct task; + spinlock_t lock; /* lock completion tasklet list */ +}; + +struct mlx5_cq_table { + spinlock_t lock; /* protect radix tree */ + struct radix_tree_root tree; +}; + +struct mlx5_eq { + struct mlx5_frag_buf_ctrl fbc; + struct mlx5_frag_buf frag_buf; + struct mlx5_core_dev *dev; + struct mlx5_cq_table cq_table; + __be32 __iomem *doorbell; + u32 cons_index; + unsigned int vecidx; + unsigned int irqn; + u8 eqn; + struct mlx5_rsc_debug *dbg; + struct mlx5_irq *irq; +}; + +struct mlx5_eq_async { + struct mlx5_eq core; + struct notifier_block irq_nb; + spinlock_t lock; /* To avoid irq EQ handle races with resiliency flows */ +}; + +struct mlx5_eq_comp { + struct mlx5_eq core; + struct notifier_block irq_nb; + struct mlx5_eq_tasklet tasklet_ctx; + struct list_head list; +}; + +static inline u32 eq_get_size(struct mlx5_eq *eq) +{ + return eq->fbc.sz_m1 + 1; +} + +static inline struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry) +{ + return mlx5_frag_buf_get_wqe(&eq->fbc, entry); +} + +static inline struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq) +{ + struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & eq->fbc.sz_m1); + + return (eqe->owner ^ (eq->cons_index >> eq->fbc.log_sz)) & 1 ? NULL : eqe; +} + +static inline void eq_update_ci(struct mlx5_eq *eq, int arm) +{ + __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); + u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); + + __raw_writel((__force u32)cpu_to_be32(val), addr); + /* We still want ordering, just not swabbing, so add a barrier */ + mb(); +} + +int mlx5_eq_table_init(struct mlx5_core_dev *dev); +void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev); +int mlx5_eq_table_create(struct mlx5_core_dev *dev); +void mlx5_eq_table_destroy(struct mlx5_core_dev *dev); + +int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); +void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); +struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn); +struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev); +void mlx5_cq_tasklet_cb(struct tasklet_struct *t); +struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix); + +u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq); +void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev); +void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev); +void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev); + +int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +void mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); + +/* This function should only be called after mlx5_cmd_force_teardown_hca */ +void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); + +#ifdef CONFIG_RFS_ACCEL +struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev); +#endif + +int mlx5_comp_irqn_get(struct mlx5_core_dev *dev, int vector, unsigned int *irqn); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/events.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/events.h new file mode 100644 index 0000000000..a0f7faea31 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/events.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __LIB_EVENTS_H__ +#define __LIB_EVENTS_H__ + +#include "mlx5_core.h" + +#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF +#define PORT_MODULE_EVENT_ERROR_TYPE_MASK 0xF + +enum port_module_event_status_type { + MLX5_MODULE_STATUS_PLUGGED = 0x1, + MLX5_MODULE_STATUS_UNPLUGGED = 0x2, + MLX5_MODULE_STATUS_ERROR = 0x3, + MLX5_MODULE_STATUS_DISABLED = 0x4, + MLX5_MODULE_STATUS_NUM, +}; + +enum port_module_event_error_type { + MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED = 0x0, + MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX = 0x1, + MLX5_MODULE_EVENT_ERROR_BUS_STUCK = 0x2, + MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT = 0x3, + MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST = 0x4, + MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER = 0x5, + MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE = 0x6, + MLX5_MODULE_EVENT_ERROR_BAD_CABLE = 0x7, + MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED = 0xc, + MLX5_MODULE_EVENT_ERROR_NUM, +}; + +struct mlx5_pme_stats { + u64 status_counters[MLX5_MODULE_STATUS_NUM]; + u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM]; +}; + +void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats); +int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data); +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c new file mode 100644 index 0000000000..a80ecb672f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -0,0 +1,807 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2020 Mellanox Technologies. + +#include +#include +#include + +#include "lib/fs_chains.h" +#include "fs_ft_pool.h" +#include "en/mapping.h" +#include "fs_core.h" +#include "en_tc.h" + +#define chains_lock(chains) ((chains)->lock) +#define chains_ht(chains) ((chains)->chains_ht) +#define prios_ht(chains) ((chains)->prios_ht) +#define chains_default_ft(chains) ((chains)->chains_default_ft) +#define chains_end_ft(chains) ((chains)->chains_end_ft) +#define FT_TBL_SZ (64 * 1024) + +struct mlx5_fs_chains { + struct mlx5_core_dev *dev; + + struct rhashtable chains_ht; + struct rhashtable prios_ht; + /* Protects above chains_ht and prios_ht */ + struct mutex lock; + + struct mlx5_flow_table *chains_default_ft; + struct mlx5_flow_table *chains_end_ft; + struct mapping_ctx *chains_mapping; + + enum mlx5_flow_namespace_type ns; + u32 group_num; + u32 flags; + int fs_base_prio; + int fs_base_level; +}; + +struct fs_chain { + struct rhash_head node; + + u32 chain; + + int ref; + int id; + + struct mlx5_fs_chains *chains; + struct list_head prios_list; + struct mlx5_flow_handle *restore_rule; + struct mlx5_modify_hdr *miss_modify_hdr; +}; + +struct prio_key { + u32 chain; + u32 prio; + u32 level; +}; + +struct prio { + struct rhash_head node; + struct list_head list; + + struct prio_key key; + + int ref; + + struct fs_chain *chain; + struct mlx5_flow_table *ft; + struct mlx5_flow_table *next_ft; + struct mlx5_flow_group *miss_group; + struct mlx5_flow_handle *miss_rule; +}; + +static const struct rhashtable_params chain_params = { + .head_offset = offsetof(struct fs_chain, node), + .key_offset = offsetof(struct fs_chain, chain), + .key_len = sizeof_field(struct fs_chain, chain), + .automatic_shrinking = true, +}; + +static const struct rhashtable_params prio_params = { + .head_offset = offsetof(struct prio, node), + .key_offset = offsetof(struct prio, key), + .key_len = sizeof_field(struct prio, key), + .automatic_shrinking = true, +}; + +bool mlx5_chains_prios_supported(struct mlx5_fs_chains *chains) +{ + return chains->flags & MLX5_CHAINS_AND_PRIOS_SUPPORTED; +} + +bool mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains) +{ + return chains->flags & MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED; +} + +bool mlx5_chains_backwards_supported(struct mlx5_fs_chains *chains) +{ + return mlx5_chains_prios_supported(chains) && + mlx5_chains_ignore_flow_level_supported(chains); +} + +u32 mlx5_chains_get_chain_range(struct mlx5_fs_chains *chains) +{ + if (!mlx5_chains_prios_supported(chains)) + return 1; + + if (mlx5_chains_ignore_flow_level_supported(chains)) + return UINT_MAX - 1; + + /* We should get here only for eswitch case */ + return FDB_TC_MAX_CHAIN; +} + +u32 mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains) +{ + return mlx5_chains_get_chain_range(chains) + 1; +} + +u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains) +{ + if (mlx5_chains_ignore_flow_level_supported(chains)) + return UINT_MAX; + + if (!chains->dev->priv.eswitch || + chains->dev->priv.eswitch->mode != MLX5_ESWITCH_OFFLOADS) + return 1; + + /* We should get here only for eswitch case */ + return FDB_TC_MAX_PRIO; +} + +static unsigned int mlx5_chains_get_level_range(struct mlx5_fs_chains *chains) +{ + if (mlx5_chains_ignore_flow_level_supported(chains)) + return UINT_MAX; + + /* Same value for FDB and NIC RX tables */ + return FDB_TC_LEVELS_PER_PRIO; +} + +void +mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains, + struct mlx5_flow_table *ft) +{ + chains_end_ft(chains) = ft; +} + +static struct mlx5_flow_table * +mlx5_chains_create_table(struct mlx5_fs_chains *chains, + u32 chain, u32 prio, u32 level) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft; + int sz; + + if (chains->flags & MLX5_CHAINS_FT_TUNNEL_SUPPORTED) + ft_attr.flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + + sz = (chain == mlx5_chains_get_nf_ft_chain(chains)) ? FT_TBL_SZ : POOL_NEXT_SIZE; + ft_attr.max_fte = sz; + + /* We use chains_default_ft(chains) as the table's next_ft till + * ignore_flow_level is allowed on FT creation and not just for FTEs. + * Instead caller should add an explicit miss rule if needed. + */ + ft_attr.next_ft = chains_default_ft(chains); + + /* The root table(chain 0, prio 1, level 0) is required to be + * connected to the previous fs_core managed prio. + * We always create it, as a managed table, in order to align with + * fs_core logic. + */ + if (!mlx5_chains_ignore_flow_level_supported(chains) || + (chain == 0 && prio == 1 && level == 0)) { + ft_attr.level = chains->fs_base_level; + ft_attr.prio = chains->fs_base_prio + prio - 1; + ns = (chains->ns == MLX5_FLOW_NAMESPACE_FDB) ? + mlx5_get_fdb_sub_ns(chains->dev, chain) : + mlx5_get_flow_namespace(chains->dev, chains->ns); + } else { + ft_attr.flags |= MLX5_FLOW_TABLE_UNMANAGED; + ft_attr.prio = chains->fs_base_prio; + /* Firmware doesn't allow us to create another level 0 table, + * so we create all unmanaged tables as level 1 (base + 1). + * + * To connect them, we use explicit miss rules with + * ignore_flow_level. Caller is responsible to create + * these rules (if needed). + */ + ft_attr.level = chains->fs_base_level + 1; + ns = mlx5_get_flow_namespace(chains->dev, chains->ns); + } + + ft_attr.autogroup.num_reserved_entries = 2; + ft_attr.autogroup.max_num_groups = chains->group_num; + ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + mlx5_core_warn(chains->dev, "Failed to create chains table err %d (chain: %d, prio: %d, level: %d, size: %d)\n", + (int)PTR_ERR(ft), chain, prio, level, sz); + return ft; + } + + return ft; +} + +static int +create_chain_restore(struct fs_chain *chain) +{ + struct mlx5_eswitch *esw = chain->chains->dev->priv.eswitch; + u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + struct mlx5_fs_chains *chains = chain->chains; + enum mlx5e_tc_attr_to_reg mapped_obj_to_reg; + struct mlx5_modify_hdr *mod_hdr; + u32 index; + int err; + + if (chain->chain == mlx5_chains_get_nf_ft_chain(chains) || + !mlx5_chains_prios_supported(chains) || + !chains->chains_mapping) + return 0; + + err = mlx5_chains_get_chain_mapping(chains, chain->chain, &index); + if (err) + return err; + if (index == MLX5_FS_DEFAULT_FLOW_TAG) { + /* we got the special default flow tag id, so we won't know + * if we actually marked the packet with the restore rule + * we create. + * + * This case isn't possible with MLX5_FS_DEFAULT_FLOW_TAG = 0. + */ + err = mlx5_chains_get_chain_mapping(chains, chain->chain, &index); + mapping_remove(chains->chains_mapping, MLX5_FS_DEFAULT_FLOW_TAG); + if (err) + return err; + } + + chain->id = index; + + if (chains->ns == MLX5_FLOW_NAMESPACE_FDB) { + mapped_obj_to_reg = MAPPED_OBJ_TO_REG; + chain->restore_rule = esw_add_restore_rule(esw, chain->id); + if (IS_ERR(chain->restore_rule)) { + err = PTR_ERR(chain->restore_rule); + goto err_rule; + } + } else if (chains->ns == MLX5_FLOW_NAMESPACE_KERNEL) { + /* For NIC RX we don't need a restore rule + * since we write the metadata to reg_b + * that is passed to SW directly. + */ + mapped_obj_to_reg = NIC_MAPPED_OBJ_TO_REG; + } else { + err = -EINVAL; + goto err_rule; + } + + MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, modact, field, + mlx5e_tc_attr_to_reg_mappings[mapped_obj_to_reg].mfield); + MLX5_SET(set_action_in, modact, offset, + mlx5e_tc_attr_to_reg_mappings[mapped_obj_to_reg].moffset); + MLX5_SET(set_action_in, modact, length, + mlx5e_tc_attr_to_reg_mappings[mapped_obj_to_reg].mlen == 32 ? + 0 : mlx5e_tc_attr_to_reg_mappings[mapped_obj_to_reg].mlen); + MLX5_SET(set_action_in, modact, data, chain->id); + mod_hdr = mlx5_modify_header_alloc(chains->dev, chains->ns, + 1, modact); + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + goto err_mod_hdr; + } + chain->miss_modify_hdr = mod_hdr; + + return 0; + +err_mod_hdr: + if (!IS_ERR_OR_NULL(chain->restore_rule)) + mlx5_del_flow_rules(chain->restore_rule); +err_rule: + /* Datapath can't find this mapping, so we can safely remove it */ + mapping_remove(chains->chains_mapping, chain->id); + return err; +} + +static void destroy_chain_restore(struct fs_chain *chain) +{ + struct mlx5_fs_chains *chains = chain->chains; + + if (!chain->miss_modify_hdr) + return; + + if (chain->restore_rule) + mlx5_del_flow_rules(chain->restore_rule); + + mlx5_modify_header_dealloc(chains->dev, chain->miss_modify_hdr); + mapping_remove(chains->chains_mapping, chain->id); +} + +static struct fs_chain * +mlx5_chains_create_chain(struct mlx5_fs_chains *chains, u32 chain) +{ + struct fs_chain *chain_s = NULL; + int err; + + chain_s = kvzalloc(sizeof(*chain_s), GFP_KERNEL); + if (!chain_s) + return ERR_PTR(-ENOMEM); + + chain_s->chains = chains; + chain_s->chain = chain; + INIT_LIST_HEAD(&chain_s->prios_list); + + err = create_chain_restore(chain_s); + if (err) + goto err_restore; + + err = rhashtable_insert_fast(&chains_ht(chains), &chain_s->node, + chain_params); + if (err) + goto err_insert; + + return chain_s; + +err_insert: + destroy_chain_restore(chain_s); +err_restore: + kvfree(chain_s); + return ERR_PTR(err); +} + +static void +mlx5_chains_destroy_chain(struct fs_chain *chain) +{ + struct mlx5_fs_chains *chains = chain->chains; + + rhashtable_remove_fast(&chains_ht(chains), &chain->node, + chain_params); + + destroy_chain_restore(chain); + kvfree(chain); +} + +static struct fs_chain * +mlx5_chains_get_chain(struct mlx5_fs_chains *chains, u32 chain) +{ + struct fs_chain *chain_s; + + chain_s = rhashtable_lookup_fast(&chains_ht(chains), &chain, + chain_params); + if (!chain_s) { + chain_s = mlx5_chains_create_chain(chains, chain); + if (IS_ERR(chain_s)) + return chain_s; + } + + chain_s->ref++; + + return chain_s; +} + +static struct mlx5_flow_handle * +mlx5_chains_add_miss_rule(struct fs_chain *chain, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) +{ + struct mlx5_fs_chains *chains = chain->chains; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act act = {}; + + act.flags = FLOW_ACT_NO_APPEND; + if (mlx5_chains_ignore_flow_level_supported(chain->chains)) + act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + + act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = next_ft; + + if (chains->chains_mapping && next_ft == chains_end_ft(chains) && + chain->chain != mlx5_chains_get_nf_ft_chain(chains) && + mlx5_chains_prios_supported(chains)) { + act.modify_hdr = chain->miss_modify_hdr; + act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + } + + return mlx5_add_flow_rules(ft, NULL, &act, &dest, 1); +} + +static int +mlx5_chains_update_prio_prevs(struct prio *prio, + struct mlx5_flow_table *next_ft) +{ + struct mlx5_flow_handle *miss_rules[FDB_TC_LEVELS_PER_PRIO + 1] = {}; + struct fs_chain *chain = prio->chain; + struct prio *pos; + int n = 0, err; + + if (prio->key.level) + return 0; + + /* Iterate in reverse order until reaching the level 0 rule of + * the previous priority, adding all the miss rules first, so we can + * revert them if any of them fails. + */ + pos = prio; + list_for_each_entry_continue_reverse(pos, + &chain->prios_list, + list) { + miss_rules[n] = mlx5_chains_add_miss_rule(chain, + pos->ft, + next_ft); + if (IS_ERR(miss_rules[n])) { + err = PTR_ERR(miss_rules[n]); + goto err_prev_rule; + } + + n++; + if (!pos->key.level) + break; + } + + /* Success, delete old miss rules, and update the pointers. */ + n = 0; + pos = prio; + list_for_each_entry_continue_reverse(pos, + &chain->prios_list, + list) { + mlx5_del_flow_rules(pos->miss_rule); + + pos->miss_rule = miss_rules[n]; + pos->next_ft = next_ft; + + n++; + if (!pos->key.level) + break; + } + + return 0; + +err_prev_rule: + while (--n >= 0) + mlx5_del_flow_rules(miss_rules[n]); + + return err; +} + +static void +mlx5_chains_put_chain(struct fs_chain *chain) +{ + if (--chain->ref == 0) + mlx5_chains_destroy_chain(chain); +} + +static struct prio * +mlx5_chains_create_prio(struct mlx5_fs_chains *chains, + u32 chain, u32 prio, u32 level) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_handle *miss_rule; + struct mlx5_flow_group *miss_group; + struct mlx5_flow_table *next_ft; + struct mlx5_flow_table *ft; + struct fs_chain *chain_s; + struct list_head *pos; + struct prio *prio_s; + u32 *flow_group_in; + int err; + + chain_s = mlx5_chains_get_chain(chains, chain); + if (IS_ERR(chain_s)) + return ERR_CAST(chain_s); + + prio_s = kvzalloc(sizeof(*prio_s), GFP_KERNEL); + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!prio_s || !flow_group_in) { + err = -ENOMEM; + goto err_alloc; + } + + /* Chain's prio list is sorted by prio and level. + * And all levels of some prio point to the next prio's level 0. + * Example list (prio, level): + * (3,0)->(3,1)->(5,0)->(5,1)->(6,1)->(7,0) + * In hardware, we will we have the following pointers: + * (3,0) -> (5,0) -> (7,0) -> Slow path + * (3,1) -> (5,0) + * (5,1) -> (7,0) + * (6,1) -> (7,0) + */ + + /* Default miss for each chain: */ + next_ft = (chain == mlx5_chains_get_nf_ft_chain(chains)) ? + chains_default_ft(chains) : + chains_end_ft(chains); + list_for_each(pos, &chain_s->prios_list) { + struct prio *p = list_entry(pos, struct prio, list); + + /* exit on first pos that is larger */ + if (prio < p->key.prio || (prio == p->key.prio && + level < p->key.level)) { + /* Get next level 0 table */ + next_ft = p->key.level == 0 ? p->ft : p->next_ft; + break; + } + } + + ft = mlx5_chains_create_table(chains, chain, prio, level); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto err_create; + } + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, + ft->max_fte - 2); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ft->max_fte - 1); + miss_group = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(miss_group)) { + err = PTR_ERR(miss_group); + goto err_group; + } + + /* Add miss rule to next_ft */ + miss_rule = mlx5_chains_add_miss_rule(chain_s, ft, next_ft); + if (IS_ERR(miss_rule)) { + err = PTR_ERR(miss_rule); + goto err_miss_rule; + } + + prio_s->miss_group = miss_group; + prio_s->miss_rule = miss_rule; + prio_s->next_ft = next_ft; + prio_s->chain = chain_s; + prio_s->key.chain = chain; + prio_s->key.prio = prio; + prio_s->key.level = level; + prio_s->ft = ft; + + err = rhashtable_insert_fast(&prios_ht(chains), &prio_s->node, + prio_params); + if (err) + goto err_insert; + + list_add(&prio_s->list, pos->prev); + + /* Table is ready, connect it */ + err = mlx5_chains_update_prio_prevs(prio_s, ft); + if (err) + goto err_update; + + kvfree(flow_group_in); + return prio_s; + +err_update: + list_del(&prio_s->list); + rhashtable_remove_fast(&prios_ht(chains), &prio_s->node, + prio_params); +err_insert: + mlx5_del_flow_rules(miss_rule); +err_miss_rule: + mlx5_destroy_flow_group(miss_group); +err_group: + mlx5_destroy_flow_table(ft); +err_create: +err_alloc: + kvfree(prio_s); + kvfree(flow_group_in); + mlx5_chains_put_chain(chain_s); + return ERR_PTR(err); +} + +static void +mlx5_chains_destroy_prio(struct mlx5_fs_chains *chains, + struct prio *prio) +{ + struct fs_chain *chain = prio->chain; + + WARN_ON(mlx5_chains_update_prio_prevs(prio, + prio->next_ft)); + + list_del(&prio->list); + rhashtable_remove_fast(&prios_ht(chains), &prio->node, + prio_params); + mlx5_del_flow_rules(prio->miss_rule); + mlx5_destroy_flow_group(prio->miss_group); + mlx5_destroy_flow_table(prio->ft); + mlx5_chains_put_chain(chain); + kvfree(prio); +} + +struct mlx5_flow_table * +mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio, + u32 level) +{ + struct mlx5_flow_table *prev_fts; + struct prio *prio_s; + struct prio_key key; + int l = 0; + + if ((chain > mlx5_chains_get_chain_range(chains) && + chain != mlx5_chains_get_nf_ft_chain(chains)) || + prio > mlx5_chains_get_prio_range(chains) || + level > mlx5_chains_get_level_range(chains)) + return ERR_PTR(-EOPNOTSUPP); + + /* create earlier levels for correct fs_core lookup when + * connecting tables. + */ + for (l = 0; l < level; l++) { + prev_fts = mlx5_chains_get_table(chains, chain, prio, l); + if (IS_ERR(prev_fts)) { + prio_s = ERR_CAST(prev_fts); + goto err_get_prevs; + } + } + + key.chain = chain; + key.prio = prio; + key.level = level; + + mutex_lock(&chains_lock(chains)); + prio_s = rhashtable_lookup_fast(&prios_ht(chains), &key, + prio_params); + if (!prio_s) { + prio_s = mlx5_chains_create_prio(chains, chain, + prio, level); + if (IS_ERR(prio_s)) + goto err_create_prio; + } + + ++prio_s->ref; + mutex_unlock(&chains_lock(chains)); + + return prio_s->ft; + +err_create_prio: + mutex_unlock(&chains_lock(chains)); +err_get_prevs: + while (--l >= 0) + mlx5_chains_put_table(chains, chain, prio, l); + return ERR_CAST(prio_s); +} + +void +mlx5_chains_put_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio, + u32 level) +{ + struct prio *prio_s; + struct prio_key key; + + key.chain = chain; + key.prio = prio; + key.level = level; + + mutex_lock(&chains_lock(chains)); + prio_s = rhashtable_lookup_fast(&prios_ht(chains), &key, + prio_params); + if (!prio_s) + goto err_get_prio; + + if (--prio_s->ref == 0) + mlx5_chains_destroy_prio(chains, prio_s); + mutex_unlock(&chains_lock(chains)); + + while (level-- > 0) + mlx5_chains_put_table(chains, chain, prio, level); + + return; + +err_get_prio: + mutex_unlock(&chains_lock(chains)); + WARN_ONCE(1, + "Couldn't find table: (chain: %d prio: %d level: %d)", + chain, prio, level); +} + +struct mlx5_flow_table * +mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains) +{ + return chains_end_ft(chains); +} + +struct mlx5_flow_table * +mlx5_chains_create_global_table(struct mlx5_fs_chains *chains) +{ + u32 chain, prio, level; + int err; + + if (!mlx5_chains_ignore_flow_level_supported(chains)) { + err = -EOPNOTSUPP; + + mlx5_core_warn(chains->dev, + "Couldn't create global flow table, ignore_flow_level not supported."); + goto err_ignore; + } + + chain = mlx5_chains_get_chain_range(chains), + prio = mlx5_chains_get_prio_range(chains); + level = mlx5_chains_get_level_range(chains); + + return mlx5_chains_create_table(chains, chain, prio, level); + +err_ignore: + return ERR_PTR(err); +} + +void +mlx5_chains_destroy_global_table(struct mlx5_fs_chains *chains, + struct mlx5_flow_table *ft) +{ + mlx5_destroy_flow_table(ft); +} + +static struct mlx5_fs_chains * +mlx5_chains_init(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr) +{ + struct mlx5_fs_chains *chains; + int err; + + chains = kzalloc(sizeof(*chains), GFP_KERNEL); + if (!chains) + return ERR_PTR(-ENOMEM); + + chains->dev = dev; + chains->flags = attr->flags; + chains->ns = attr->ns; + chains->group_num = attr->max_grp_num; + chains->chains_mapping = attr->mapping; + chains->fs_base_prio = attr->fs_base_prio; + chains->fs_base_level = attr->fs_base_level; + chains_default_ft(chains) = chains_end_ft(chains) = attr->default_ft; + + err = rhashtable_init(&chains_ht(chains), &chain_params); + if (err) + goto init_chains_ht_err; + + err = rhashtable_init(&prios_ht(chains), &prio_params); + if (err) + goto init_prios_ht_err; + + mutex_init(&chains_lock(chains)); + + return chains; + +init_prios_ht_err: + rhashtable_destroy(&chains_ht(chains)); +init_chains_ht_err: + kfree(chains); + return ERR_PTR(err); +} + +static void +mlx5_chains_cleanup(struct mlx5_fs_chains *chains) +{ + mutex_destroy(&chains_lock(chains)); + rhashtable_destroy(&prios_ht(chains)); + rhashtable_destroy(&chains_ht(chains)); + + kfree(chains); +} + +struct mlx5_fs_chains * +mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr) +{ + struct mlx5_fs_chains *chains; + + chains = mlx5_chains_init(dev, attr); + + return chains; +} + +void +mlx5_chains_destroy(struct mlx5_fs_chains *chains) +{ + mlx5_chains_cleanup(chains); +} + +int +mlx5_chains_get_chain_mapping(struct mlx5_fs_chains *chains, u32 chain, + u32 *chain_mapping) +{ + struct mapping_ctx *ctx = chains->chains_mapping; + struct mlx5_mapped_obj mapped_obj = {}; + + mapped_obj.type = MLX5_MAPPED_OBJ_CHAIN; + mapped_obj.chain = chain; + return mapping_add(ctx, &mapped_obj, chain_mapping); +} + +int +mlx5_chains_put_chain_mapping(struct mlx5_fs_chains *chains, u32 chain_mapping) +{ + struct mapping_ctx *ctx = chains->chains_mapping; + + return mapping_remove(ctx, chain_mapping); +} + +void +mlx5_chains_print_info(struct mlx5_fs_chains *chains) +{ + mlx5_core_dbg(chains->dev, "Flow table chains groups(%d)\n", chains->group_num); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h new file mode 100644 index 0000000000..8972fe0572 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies. */ + +#ifndef __ML5_ESW_CHAINS_H__ +#define __ML5_ESW_CHAINS_H__ + +#include + +struct mlx5_fs_chains; +struct mlx5_mapped_obj; + +enum mlx5_chains_flags { + MLX5_CHAINS_AND_PRIOS_SUPPORTED = BIT(0), + MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED = BIT(1), + MLX5_CHAINS_FT_TUNNEL_SUPPORTED = BIT(2), +}; + +struct mlx5_chains_attr { + enum mlx5_flow_namespace_type ns; + int fs_base_prio; + int fs_base_level; + u32 flags; + u32 max_grp_num; + struct mlx5_flow_table *default_ft; + struct mapping_ctx *mapping; +}; + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + +bool +mlx5_chains_prios_supported(struct mlx5_fs_chains *chains); +bool mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains); +bool +mlx5_chains_backwards_supported(struct mlx5_fs_chains *chains); +u32 +mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains); +u32 +mlx5_chains_get_chain_range(struct mlx5_fs_chains *chains); +u32 +mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains); + +struct mlx5_flow_table * +mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio, + u32 level); +void +mlx5_chains_put_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio, + u32 level); + +struct mlx5_flow_table * +mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains); + +struct mlx5_flow_table * +mlx5_chains_create_global_table(struct mlx5_fs_chains *chains); +void +mlx5_chains_destroy_global_table(struct mlx5_fs_chains *chains, + struct mlx5_flow_table *ft); + +int +mlx5_chains_get_chain_mapping(struct mlx5_fs_chains *chains, u32 chain, + u32 *chain_mapping); +int +mlx5_chains_put_chain_mapping(struct mlx5_fs_chains *chains, + u32 chain_mapping); + +struct mlx5_fs_chains * +mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr); +void mlx5_chains_destroy(struct mlx5_fs_chains *chains); + +void +mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains, + struct mlx5_flow_table *ft); +void +mlx5_chains_print_info(struct mlx5_fs_chains *chains); + +#else /* CONFIG_MLX5_CLS_ACT */ + +static inline bool +mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains) +{ return false; } + +static inline struct mlx5_flow_table * +mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio, + u32 level) { return ERR_PTR(-EOPNOTSUPP); } +static inline void +mlx5_chains_put_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio, + u32 level) {}; + +static inline struct mlx5_flow_table * +mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains) { return ERR_PTR(-EOPNOTSUPP); } + +static inline struct mlx5_fs_chains * +mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr) +{ return NULL; } +static inline void +mlx5_chains_destroy(struct mlx5_fs_chains *chains) {} +static inline void +mlx5_chains_print_info(struct mlx5_fs_chains *chains) {} + +#endif /* CONFIG_MLX5_CLS_ACT */ + +#endif /* __ML5_ESW_CHAINS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c new file mode 100644 index 0000000000..b78f2ba25c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c @@ -0,0 +1,608 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. + +#include +#include +#include +#include +#include +#include "mlx5_core.h" +#include "lib/fs_ttc.h" + +#define MLX5_TTC_NUM_GROUPS 3 +#define MLX5_TTC_GROUP1_SIZE (BIT(3) + MLX5_NUM_TUNNEL_TT) +#define MLX5_TTC_GROUP2_SIZE BIT(1) +#define MLX5_TTC_GROUP3_SIZE BIT(0) +#define MLX5_TTC_TABLE_SIZE (MLX5_TTC_GROUP1_SIZE +\ + MLX5_TTC_GROUP2_SIZE +\ + MLX5_TTC_GROUP3_SIZE) + +#define MLX5_INNER_TTC_NUM_GROUPS 3 +#define MLX5_INNER_TTC_GROUP1_SIZE BIT(3) +#define MLX5_INNER_TTC_GROUP2_SIZE BIT(1) +#define MLX5_INNER_TTC_GROUP3_SIZE BIT(0) +#define MLX5_INNER_TTC_TABLE_SIZE (MLX5_INNER_TTC_GROUP1_SIZE +\ + MLX5_INNER_TTC_GROUP2_SIZE +\ + MLX5_INNER_TTC_GROUP3_SIZE) + +/* L3/L4 traffic type classifier */ +struct mlx5_ttc_table { + int num_groups; + struct mlx5_flow_table *t; + struct mlx5_flow_group **g; + struct mlx5_ttc_rule rules[MLX5_NUM_TT]; + struct mlx5_flow_handle *tunnel_rules[MLX5_NUM_TUNNEL_TT]; +}; + +struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc) +{ + return ttc->t; +} + +static void mlx5_cleanup_ttc_rules(struct mlx5_ttc_table *ttc) +{ + int i; + + for (i = 0; i < MLX5_NUM_TT; i++) { + if (!IS_ERR_OR_NULL(ttc->rules[i].rule)) { + mlx5_del_flow_rules(ttc->rules[i].rule); + ttc->rules[i].rule = NULL; + } + } + + for (i = 0; i < MLX5_NUM_TUNNEL_TT; i++) { + if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) { + mlx5_del_flow_rules(ttc->tunnel_rules[i]); + ttc->tunnel_rules[i] = NULL; + } + } +} + +struct mlx5_etype_proto { + u16 etype; + u8 proto; +}; + +static struct mlx5_etype_proto ttc_rules[] = { + [MLX5_TT_IPV4_TCP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_TCP, + }, + [MLX5_TT_IPV6_TCP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_TCP, + }, + [MLX5_TT_IPV4_UDP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_UDP, + }, + [MLX5_TT_IPV6_UDP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_UDP, + }, + [MLX5_TT_IPV4_IPSEC_AH] = { + .etype = ETH_P_IP, + .proto = IPPROTO_AH, + }, + [MLX5_TT_IPV6_IPSEC_AH] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_AH, + }, + [MLX5_TT_IPV4_IPSEC_ESP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_ESP, + }, + [MLX5_TT_IPV6_IPSEC_ESP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_ESP, + }, + [MLX5_TT_IPV4] = { + .etype = ETH_P_IP, + .proto = 0, + }, + [MLX5_TT_IPV6] = { + .etype = ETH_P_IPV6, + .proto = 0, + }, + [MLX5_TT_ANY] = { + .etype = 0, + .proto = 0, + }, +}; + +static struct mlx5_etype_proto ttc_tunnel_rules[] = { + [MLX5_TT_IPV4_GRE] = { + .etype = ETH_P_IP, + .proto = IPPROTO_GRE, + }, + [MLX5_TT_IPV6_GRE] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_GRE, + }, + [MLX5_TT_IPV4_IPIP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_IPIP, + }, + [MLX5_TT_IPV6_IPIP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_IPIP, + }, + [MLX5_TT_IPV4_IPV6] = { + .etype = ETH_P_IP, + .proto = IPPROTO_IPV6, + }, + [MLX5_TT_IPV6_IPV6] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_IPV6, + }, + +}; + +u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt) +{ + return ttc_tunnel_rules[tt].proto; +} + +static bool mlx5_tunnel_proto_supported_rx(struct mlx5_core_dev *mdev, + u8 proto_type) +{ + switch (proto_type) { + case IPPROTO_GRE: + return MLX5_CAP_ETH(mdev, tunnel_stateless_gre); + case IPPROTO_IPIP: + case IPPROTO_IPV6: + return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) || + MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_rx)); + default: + return false; + } +} + +static bool mlx5_tunnel_any_rx_proto_supported(struct mlx5_core_dev *mdev) +{ + int tt; + + for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { + if (mlx5_tunnel_proto_supported_rx(mdev, + ttc_tunnel_rules[tt].proto)) + return true; + } + return false; +} + +bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) +{ + return (mlx5_tunnel_any_rx_proto_supported(mdev) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.inner_ip_version)); +} + +static u8 mlx5_etype_to_ipv(u16 ethertype) +{ + if (ethertype == ETH_P_IP) + return 4; + + if (ethertype == ETH_P_IPV6) + return 6; + + return 0; +} + +static struct mlx5_flow_handle * +mlx5_generate_ttc_rule(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, + struct mlx5_flow_destination *dest, u16 etype, u8 proto) +{ + int match_ipv_outer = + MLX5_CAP_FLOWTABLE_NIC_RX(dev, + ft_field_support.outer_ip_version); + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + u8 ipv; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + if (proto) { + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto); + } + + ipv = mlx5_etype_to_ipv(etype); + if (match_ipv_outer && ipv) { + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv); + } else if (etype) { + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype); + } + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(dev, "%s: add rule failed\n", __func__); + } + + kvfree(spec); + return err ? ERR_PTR(err) : rule; +} + +static int mlx5_generate_ttc_table_rules(struct mlx5_core_dev *dev, + struct ttc_params *params, + struct mlx5_ttc_table *ttc) +{ + struct mlx5_flow_handle **trules; + struct mlx5_ttc_rule *rules; + struct mlx5_flow_table *ft; + int tt; + int err; + + ft = ttc->t; + rules = ttc->rules; + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + struct mlx5_ttc_rule *rule = &rules[tt]; + + if (test_bit(tt, params->ignore_dests)) + continue; + rule->rule = mlx5_generate_ttc_rule(dev, ft, ¶ms->dests[tt], + ttc_rules[tt].etype, + ttc_rules[tt].proto); + if (IS_ERR(rule->rule)) { + err = PTR_ERR(rule->rule); + rule->rule = NULL; + goto del_rules; + } + rule->default_dest = params->dests[tt]; + } + + if (!params->inner_ttc || !mlx5_tunnel_inner_ft_supported(dev)) + return 0; + + trules = ttc->tunnel_rules; + for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { + if (!mlx5_tunnel_proto_supported_rx(dev, + ttc_tunnel_rules[tt].proto)) + continue; + if (test_bit(tt, params->ignore_tunnel_dests)) + continue; + trules[tt] = mlx5_generate_ttc_rule(dev, ft, + ¶ms->tunnel_dests[tt], + ttc_tunnel_rules[tt].etype, + ttc_tunnel_rules[tt].proto); + if (IS_ERR(trules[tt])) { + err = PTR_ERR(trules[tt]); + trules[tt] = NULL; + goto del_rules; + } + } + + return 0; + +del_rules: + mlx5_cleanup_ttc_rules(ttc); + return err; +} + +static int mlx5_create_ttc_table_groups(struct mlx5_ttc_table *ttc, + bool use_ipv) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ttc->g = kcalloc(MLX5_TTC_NUM_GROUPS, sizeof(*ttc->g), GFP_KERNEL); + if (!ttc->g) + return -ENOMEM; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + kfree(ttc->g); + ttc->g = NULL; + return -ENOMEM; + } + + /* L4 Group */ + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); + if (use_ipv) + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version); + else + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_TTC_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + /* L3 Group */ + MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_TTC_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + /* Any Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_TTC_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ttc->g[ttc->num_groups]); + ttc->g[ttc->num_groups] = NULL; + kvfree(in); + + return err; +} + +static struct mlx5_flow_handle * +mlx5_generate_inner_ttc_rule(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_destination *dest, + u16 etype, u8 proto) +{ + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + u8 ipv; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + ipv = mlx5_etype_to_ipv(etype); + if (etype && ipv) { + spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv); + } + + if (proto) { + spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto); + } + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(dev, "%s: add inner TTC rule failed\n", __func__); + } + + kvfree(spec); + return err ? ERR_PTR(err) : rule; +} + +static int mlx5_generate_inner_ttc_table_rules(struct mlx5_core_dev *dev, + struct ttc_params *params, + struct mlx5_ttc_table *ttc) +{ + struct mlx5_ttc_rule *rules; + struct mlx5_flow_table *ft; + int err; + int tt; + + ft = ttc->t; + rules = ttc->rules; + + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + struct mlx5_ttc_rule *rule = &rules[tt]; + + if (test_bit(tt, params->ignore_dests)) + continue; + rule->rule = mlx5_generate_inner_ttc_rule(dev, ft, + ¶ms->dests[tt], + ttc_rules[tt].etype, + ttc_rules[tt].proto); + if (IS_ERR(rule->rule)) { + err = PTR_ERR(rule->rule); + rule->rule = NULL; + goto del_rules; + } + rule->default_dest = params->dests[tt]; + } + + return 0; + +del_rules: + + mlx5_cleanup_ttc_rules(ttc); + return err; +} + +static int mlx5_create_inner_ttc_table_groups(struct mlx5_ttc_table *ttc) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ttc->g = kcalloc(MLX5_INNER_TTC_NUM_GROUPS, sizeof(*ttc->g), + GFP_KERNEL); + if (!ttc->g) + return -ENOMEM; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + kfree(ttc->g); + ttc->g = NULL; + return -ENOMEM; + } + + /* L4 Group */ + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_INNER_TTC_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + /* L3 Group */ + MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_INNER_TTC_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + /* Any Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_INNER_TTC_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ttc->g[ttc->num_groups]); + ttc->g[ttc->num_groups] = NULL; + kvfree(in); + + return err; +} + +struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev, + struct ttc_params *params) +{ + struct mlx5_ttc_table *ttc; + int err; + + ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); + if (!ttc) + return ERR_PTR(-ENOMEM); + + WARN_ON_ONCE(params->ft_attr.max_fte); + params->ft_attr.max_fte = MLX5_INNER_TTC_TABLE_SIZE; + ttc->t = mlx5_create_flow_table(params->ns, ¶ms->ft_attr); + if (IS_ERR(ttc->t)) { + err = PTR_ERR(ttc->t); + kvfree(ttc); + return ERR_PTR(err); + } + + err = mlx5_create_inner_ttc_table_groups(ttc); + if (err) + goto destroy_ft; + + err = mlx5_generate_inner_ttc_table_rules(dev, params, ttc); + if (err) + goto destroy_ft; + + return ttc; + +destroy_ft: + mlx5_destroy_ttc_table(ttc); + return ERR_PTR(err); +} + +void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc) +{ + int i; + + mlx5_cleanup_ttc_rules(ttc); + for (i = ttc->num_groups - 1; i >= 0; i--) { + if (!IS_ERR_OR_NULL(ttc->g[i])) + mlx5_destroy_flow_group(ttc->g[i]); + ttc->g[i] = NULL; + } + + kfree(ttc->g); + mlx5_destroy_flow_table(ttc->t); + kvfree(ttc); +} + +struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, + struct ttc_params *params) +{ + bool match_ipv_outer = + MLX5_CAP_FLOWTABLE_NIC_RX(dev, + ft_field_support.outer_ip_version); + struct mlx5_ttc_table *ttc; + int err; + + ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); + if (!ttc) + return ERR_PTR(-ENOMEM); + + WARN_ON_ONCE(params->ft_attr.max_fte); + params->ft_attr.max_fte = MLX5_TTC_TABLE_SIZE; + ttc->t = mlx5_create_flow_table(params->ns, ¶ms->ft_attr); + if (IS_ERR(ttc->t)) { + err = PTR_ERR(ttc->t); + kvfree(ttc); + return ERR_PTR(err); + } + + err = mlx5_create_ttc_table_groups(ttc, match_ipv_outer); + if (err) + goto destroy_ft; + + err = mlx5_generate_ttc_table_rules(dev, params, ttc); + if (err) + goto destroy_ft; + + return ttc; + +destroy_ft: + mlx5_destroy_ttc_table(ttc); + return ERR_PTR(err); +} + +int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type, + struct mlx5_flow_destination *new_dest) +{ + return mlx5_modify_rule_destination(ttc->rules[type].rule, new_dest, + NULL); +} + +struct mlx5_flow_destination +mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc, + enum mlx5_traffic_types type) +{ + struct mlx5_flow_destination *dest = &ttc->rules[type].default_dest; + + WARN_ONCE(dest->type != MLX5_FLOW_DESTINATION_TYPE_TIR, + "TTC[%d] default dest is not setup yet", type); + + return *dest; +} + +int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc, + enum mlx5_traffic_types type) +{ + struct mlx5_flow_destination dest = mlx5_ttc_get_default_dest(ttc, type); + + return mlx5_ttc_fwd_dest(ttc, type, &dest); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h new file mode 100644 index 0000000000..85fef0cd1c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies. */ + +#ifndef __ML5_FS_TTC_H__ +#define __ML5_FS_TTC_H__ + +#include + +enum mlx5_traffic_types { + MLX5_TT_IPV4_TCP, + MLX5_TT_IPV6_TCP, + MLX5_TT_IPV4_UDP, + MLX5_TT_IPV6_UDP, + MLX5_TT_IPV4_IPSEC_AH, + MLX5_TT_IPV6_IPSEC_AH, + MLX5_TT_IPV4_IPSEC_ESP, + MLX5_TT_IPV6_IPSEC_ESP, + MLX5_TT_IPV4, + MLX5_TT_IPV6, + MLX5_TT_ANY, + MLX5_NUM_TT, + MLX5_NUM_INDIR_TIRS = MLX5_TT_ANY, +}; + +enum mlx5_tunnel_types { + MLX5_TT_IPV4_GRE, + MLX5_TT_IPV6_GRE, + MLX5_TT_IPV4_IPIP, + MLX5_TT_IPV6_IPIP, + MLX5_TT_IPV4_IPV6, + MLX5_TT_IPV6_IPV6, + MLX5_NUM_TUNNEL_TT, +}; + +struct mlx5_ttc_rule { + struct mlx5_flow_handle *rule; + struct mlx5_flow_destination default_dest; +}; + +struct mlx5_ttc_table; + +struct ttc_params { + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table_attr ft_attr; + struct mlx5_flow_destination dests[MLX5_NUM_TT]; + DECLARE_BITMAP(ignore_dests, MLX5_NUM_TT); + bool inner_ttc; + DECLARE_BITMAP(ignore_tunnel_dests, MLX5_NUM_TUNNEL_TT); + struct mlx5_flow_destination tunnel_dests[MLX5_NUM_TUNNEL_TT]; +}; + +struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc); + +struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, + struct ttc_params *params); +void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc); + +struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev, + struct ttc_params *params); + +int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type, + struct mlx5_flow_destination *new_dest); +struct mlx5_flow_destination +mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc, + enum mlx5_traffic_types type); +int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc, + enum mlx5_traffic_types type); + +bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev); +u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt); + +#endif /* __MLX5_FS_TTC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c new file mode 100644 index 0000000000..6dc83e871c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include +#include "mlx5_core.h" +#include "geneve.h" + +struct mlx5_geneve { + struct mlx5_core_dev *mdev; + __be16 opt_class; + u8 opt_type; + u32 obj_id; + struct mutex sync_lock; /* protect GENEVE obj operations */ + u32 refcount; +}; + +static int mlx5_geneve_tlv_option_create(struct mlx5_core_dev *mdev, + __be16 class, + u8 type, + u8 len) +{ + u32 in[MLX5_ST_SZ_DW(create_geneve_tlv_option_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u64 general_obj_types; + void *hdr, *opt; + u16 obj_id; + int err; + + general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types); + if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT)) + return -EINVAL; + + hdr = MLX5_ADDR_OF(create_geneve_tlv_option_in, in, hdr); + opt = MLX5_ADDR_OF(create_geneve_tlv_option_in, in, geneve_tlv_opt); + + MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_GENEVE_TLV_OPT); + + MLX5_SET(geneve_tlv_option, opt, option_class, be16_to_cpu(class)); + MLX5_SET(geneve_tlv_option, opt, option_type, type); + MLX5_SET(geneve_tlv_option, opt, option_data_length, len); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + return obj_id; +} + +static void mlx5_geneve_tlv_option_destroy(struct mlx5_core_dev *mdev, u16 obj_id) +{ + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_GENEVE_TLV_OPT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt) +{ + int res = 0; + + if (IS_ERR_OR_NULL(geneve)) + return -EOPNOTSUPP; + + mutex_lock(&geneve->sync_lock); + + if (geneve->refcount) { + if (geneve->opt_class == opt->opt_class && + geneve->opt_type == opt->type) { + /* We already have TLV options obj allocated */ + geneve->refcount++; + } else { + /* TLV options obj allocated, but its params + * do not match the new request. + * We support only one such object. + */ + mlx5_core_warn(geneve->mdev, + "Won't create Geneve TLV opt object with class:type:len = 0x%x:0x%x:%d (another class:type already exists)\n", + be16_to_cpu(opt->opt_class), + opt->type, + opt->length); + res = -EOPNOTSUPP; + goto unlock; + } + } else { + /* We don't have any TLV options obj allocated */ + + res = mlx5_geneve_tlv_option_create(geneve->mdev, + opt->opt_class, + opt->type, + opt->length); + if (res < 0) { + mlx5_core_warn(geneve->mdev, + "Failed creating Geneve TLV opt object class:type:len = 0x%x:0x%x:%d (err=%d)\n", + be16_to_cpu(opt->opt_class), + opt->type, opt->length, res); + goto unlock; + } + geneve->opt_class = opt->opt_class; + geneve->opt_type = opt->type; + geneve->obj_id = res; + geneve->refcount++; + res = 0; + } + +unlock: + mutex_unlock(&geneve->sync_lock); + return res; +} + +void mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve) +{ + if (IS_ERR_OR_NULL(geneve)) + return; + + mutex_lock(&geneve->sync_lock); + if (--geneve->refcount == 0) { + /* We've just removed the last user of Geneve option. + * Now delete the object in FW. + */ + mlx5_geneve_tlv_option_destroy(geneve->mdev, geneve->obj_id); + + geneve->opt_class = 0; + geneve->opt_type = 0; + geneve->obj_id = 0; + } + mutex_unlock(&geneve->sync_lock); +} + +struct mlx5_geneve *mlx5_geneve_create(struct mlx5_core_dev *mdev) +{ + struct mlx5_geneve *geneve = + kzalloc(sizeof(*geneve), GFP_KERNEL); + + if (!geneve) + return ERR_PTR(-ENOMEM); + geneve->mdev = mdev; + mutex_init(&geneve->sync_lock); + + return geneve; +} + +void mlx5_geneve_destroy(struct mlx5_geneve *geneve) +{ + if (IS_ERR_OR_NULL(geneve)) + return; + + /* Lockless since we are unloading */ + if (geneve->refcount) + mlx5_geneve_tlv_option_destroy(geneve->mdev, geneve->obj_id); + + kfree(geneve); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h new file mode 100644 index 0000000000..adee0cbba1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_GENEVE_H__ +#define __MLX5_GENEVE_H__ + +#include +#include + +struct mlx5_geneve; + +#ifdef CONFIG_MLX5_ESWITCH + +struct mlx5_geneve *mlx5_geneve_create(struct mlx5_core_dev *mdev); +void mlx5_geneve_destroy(struct mlx5_geneve *geneve); + +int mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt); +void mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve); + +#else /* CONFIG_MLX5_ESWITCH */ + +static inline struct mlx5_geneve +*mlx5_geneve_create(struct mlx5_core_dev *mdev) { return NULL; } +static inline void +mlx5_geneve_destroy(struct mlx5_geneve *geneve) {} +static inline int +mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt) { return 0; } +static inline void +mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve) {} + +#endif /* CONFIG_MLX5_ESWITCH */ + +#endif /* __MLX5_GENEVE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c new file mode 100644 index 0000000000..96ffc0a0e6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include "mlx5_core.h" +#include "lib/mlx5.h" + +void mlx5_init_reserved_gids(struct mlx5_core_dev *dev) +{ + unsigned int tblsz = MLX5_CAP_ROCE(dev, roce_address_table_size); + + ida_init(&dev->roce.reserved_gids.ida); + dev->roce.reserved_gids.start = tblsz; + dev->roce.reserved_gids.count = 0; +} + +void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev) +{ + WARN_ON(!ida_is_empty(&dev->roce.reserved_gids.ida)); + dev->roce.reserved_gids.start = 0; + dev->roce.reserved_gids.count = 0; + ida_destroy(&dev->roce.reserved_gids.ida); +} + +int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count) +{ + if (dev->roce.reserved_gids.start < count) { + mlx5_core_warn(dev, "GID table exhausted attempting to reserve %d more GIDs\n", + count); + return -ENOMEM; + } + if (dev->roce.reserved_gids.count + count > MLX5_MAX_RESERVED_GIDS) { + mlx5_core_warn(dev, "Unable to reserve %d more GIDs\n", count); + return -ENOMEM; + } + + dev->roce.reserved_gids.start -= count; + dev->roce.reserved_gids.count += count; + mlx5_core_dbg(dev, "Reserved %u GIDs starting at %u\n", + dev->roce.reserved_gids.count, + dev->roce.reserved_gids.start); + return 0; +} + +void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count) +{ + WARN(count > dev->roce.reserved_gids.count, "Unreserving %u GIDs when only %u reserved", + count, dev->roce.reserved_gids.count); + + dev->roce.reserved_gids.start += count; + dev->roce.reserved_gids.count -= count; + mlx5_core_dbg(dev, "%u GIDs starting at %u left reserved\n", + dev->roce.reserved_gids.count, + dev->roce.reserved_gids.start); +} + +int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index) +{ + int end = dev->roce.reserved_gids.start + + dev->roce.reserved_gids.count - 1; + int index = 0; + + index = ida_alloc_range(&dev->roce.reserved_gids.ida, + dev->roce.reserved_gids.start, end, + GFP_KERNEL); + if (index < 0) + return index; + + mlx5_core_dbg(dev, "Allocating reserved GID %u\n", index); + *gid_index = index; + return 0; +} + +void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index) +{ + mlx5_core_dbg(dev, "Freeing reserved GID %u\n", gid_index); + ida_free(&dev->roce.reserved_gids.ida, gid_index); +} + +unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev) +{ + return dev->roce.reserved_gids.count; +} +EXPORT_SYMBOL_GPL(mlx5_core_reserved_gids_count); + +int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, + u8 roce_version, u8 roce_l3_type, const u8 *gid, + const u8 *mac, bool vlan, u16 vlan_id, u8 port_num) +{ +#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v) + u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {}; + void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address); + char *addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, in_addr, + source_l3_address); + void *addr_mac = MLX5_ADDR_OF(roce_addr_layout, in_addr, + source_mac_47_32); + int gidsz = MLX5_FLD_SZ_BYTES(roce_addr_layout, source_l3_address); + + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return -EINVAL; + + if (gid) { + if (vlan) { + MLX5_SET_RA(in_addr, vlan_valid, 1); + MLX5_SET_RA(in_addr, vlan_id, vlan_id); + } + + ether_addr_copy(addr_mac, mac); + memcpy(addr_l3_addr, gid, gidsz); + } + MLX5_SET_RA(in_addr, roce_version, roce_version); + MLX5_SET_RA(in_addr, roce_l3_type, roce_l3_type); + + if (MLX5_CAP_GEN(dev, num_vhca_ports) > 0) + MLX5_SET(set_roce_address_in, in, vhca_port_num, port_num); + + MLX5_SET(set_roce_address_in, in, roce_address_index, index); + MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS); + return mlx5_cmd_exec_in(dev, set_roce_address, in); +} +EXPORT_SYMBOL(mlx5_core_roce_gid_set); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c new file mode 100644 index 0000000000..583dc7e2ac --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2018 Mellanox Technologies + +#include +#include "mlx5_core.h" +#include "lib/hv.h" + +static int mlx5_hv_config_common(struct mlx5_core_dev *dev, void *buf, int len, + int offset, bool read) +{ + int rc = -EOPNOTSUPP; + int bytes_returned; + int block_id; + + if (offset % HV_CONFIG_BLOCK_SIZE_MAX || len != HV_CONFIG_BLOCK_SIZE_MAX) + return -EINVAL; + + block_id = offset / HV_CONFIG_BLOCK_SIZE_MAX; + + rc = read ? + hyperv_read_cfg_blk(dev->pdev, buf, + HV_CONFIG_BLOCK_SIZE_MAX, block_id, + &bytes_returned) : + hyperv_write_cfg_blk(dev->pdev, buf, + HV_CONFIG_BLOCK_SIZE_MAX, block_id); + + /* Make sure len bytes were read successfully */ + if (read && !rc && len != bytes_returned) + rc = -EIO; + + if (rc) { + mlx5_core_err(dev, "Failed to %s hv config, err = %d, len = %d, offset = %d\n", + read ? "read" : "write", rc, len, + offset); + return rc; + } + + return 0; +} + +int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len, + int offset) +{ + return mlx5_hv_config_common(dev, buf, len, offset, true); +} + +int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len, + int offset) +{ + return mlx5_hv_config_common(dev, buf, len, offset, false); +} + +int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context, + void (*block_invalidate)(void *context, + u64 block_mask)) +{ + return hyperv_reg_block_invalidate(dev->pdev, context, + block_invalidate); +} + +void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev) +{ + hyperv_reg_block_invalidate(dev->pdev, NULL, NULL); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h new file mode 100644 index 0000000000..f9a45573f4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __LIB_HV_H__ +#define __LIB_HV_H__ + +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) + +#include +#include + +int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len, + int offset); +int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len, + int offset); +int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context, + void (*block_invalidate)(void *context, + u64 block_mask)); +void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev); +#endif + +#endif /* __LIB_HV_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c new file mode 100644 index 0000000000..30564d9b00 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2018 Mellanox Technologies + +#include +#include "mlx5_core.h" +#include "lib/hv.h" +#include "lib/hv_vhca.h" + +struct mlx5_hv_vhca { + struct mlx5_core_dev *dev; + struct workqueue_struct *work_queue; + struct mlx5_hv_vhca_agent *agents[MLX5_HV_VHCA_AGENT_MAX]; + struct mutex agents_lock; /* Protect agents array */ +}; + +struct mlx5_hv_vhca_work { + struct work_struct invalidate_work; + struct mlx5_hv_vhca *hv_vhca; + u64 block_mask; +}; + +struct mlx5_hv_vhca_data_block { + u16 sequence; + u16 offset; + u8 reserved[4]; + u64 data[15]; +}; + +struct mlx5_hv_vhca_agent { + enum mlx5_hv_vhca_agent_type type; + struct mlx5_hv_vhca *hv_vhca; + void *priv; + u16 seq; + void (*control)(struct mlx5_hv_vhca_agent *agent, + struct mlx5_hv_vhca_control_block *block); + void (*invalidate)(struct mlx5_hv_vhca_agent *agent, + u64 block_mask); + void (*cleanup)(struct mlx5_hv_vhca_agent *agent); +}; + +struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev) +{ + struct mlx5_hv_vhca *hv_vhca; + + hv_vhca = kzalloc(sizeof(*hv_vhca), GFP_KERNEL); + if (!hv_vhca) + return ERR_PTR(-ENOMEM); + + hv_vhca->work_queue = create_singlethread_workqueue("mlx5_hv_vhca"); + if (!hv_vhca->work_queue) { + kfree(hv_vhca); + return ERR_PTR(-ENOMEM); + } + + hv_vhca->dev = dev; + mutex_init(&hv_vhca->agents_lock); + + return hv_vhca; +} + +void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca) +{ + if (IS_ERR_OR_NULL(hv_vhca)) + return; + + destroy_workqueue(hv_vhca->work_queue); + kfree(hv_vhca); +} + +static void mlx5_hv_vhca_invalidate_work(struct work_struct *work) +{ + struct mlx5_hv_vhca_work *hwork; + struct mlx5_hv_vhca *hv_vhca; + int i; + + hwork = container_of(work, struct mlx5_hv_vhca_work, invalidate_work); + hv_vhca = hwork->hv_vhca; + + mutex_lock(&hv_vhca->agents_lock); + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) { + struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i]; + + if (!agent || !agent->invalidate) + continue; + + if (!(BIT(agent->type) & hwork->block_mask)) + continue; + + agent->invalidate(agent, hwork->block_mask); + } + mutex_unlock(&hv_vhca->agents_lock); + + kfree(hwork); +} + +void mlx5_hv_vhca_invalidate(void *context, u64 block_mask) +{ + struct mlx5_hv_vhca *hv_vhca = (struct mlx5_hv_vhca *)context; + struct mlx5_hv_vhca_work *work; + + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return; + + INIT_WORK(&work->invalidate_work, mlx5_hv_vhca_invalidate_work); + work->hv_vhca = hv_vhca; + work->block_mask = block_mask; + + queue_work(hv_vhca->work_queue, &work->invalidate_work); +} + +#define AGENT_MASK(type) (type ? BIT(type - 1) : 0 /* control */) + +static void mlx5_hv_vhca_agents_control(struct mlx5_hv_vhca *hv_vhca, + struct mlx5_hv_vhca_control_block *block) +{ + int i; + + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) { + struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i]; + + if (!agent || !agent->control) + continue; + + if (!(AGENT_MASK(agent->type) & block->control)) + continue; + + agent->control(agent, block); + } +} + +static void mlx5_hv_vhca_capabilities(struct mlx5_hv_vhca *hv_vhca, + u32 *capabilities) +{ + int i; + + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) { + struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i]; + + if (agent) + *capabilities |= AGENT_MASK(agent->type); + } +} + +static void +mlx5_hv_vhca_control_agent_invalidate(struct mlx5_hv_vhca_agent *agent, + u64 block_mask) +{ + struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca; + struct mlx5_core_dev *dev = hv_vhca->dev; + struct mlx5_hv_vhca_control_block *block; + u32 capabilities = 0; + int err; + + block = kzalloc(sizeof(*block), GFP_KERNEL); + if (!block) + return; + + err = mlx5_hv_read_config(dev, block, sizeof(*block), 0); + if (err) + goto free_block; + + mlx5_hv_vhca_capabilities(hv_vhca, &capabilities); + + /* In case no capabilities, send empty block in return */ + if (!capabilities) { + memset(block, 0, sizeof(*block)); + goto write; + } + + if (block->capabilities != capabilities) + block->capabilities = capabilities; + + if (block->control & ~capabilities) + goto free_block; + + mlx5_hv_vhca_agents_control(hv_vhca, block); + block->command_ack = block->command; + +write: + mlx5_hv_write_config(dev, block, sizeof(*block), 0); + +free_block: + kfree(block); +} + +static struct mlx5_hv_vhca_agent * +mlx5_hv_vhca_control_agent_create(struct mlx5_hv_vhca *hv_vhca) +{ + return mlx5_hv_vhca_agent_create(hv_vhca, MLX5_HV_VHCA_AGENT_CONTROL, + NULL, + mlx5_hv_vhca_control_agent_invalidate, + NULL, NULL); +} + +static void mlx5_hv_vhca_control_agent_destroy(struct mlx5_hv_vhca_agent *agent) +{ + mlx5_hv_vhca_agent_destroy(agent); +} + +int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca) +{ + struct mlx5_hv_vhca_agent *agent; + int err; + + if (IS_ERR_OR_NULL(hv_vhca)) + return IS_ERR_OR_NULL(hv_vhca); + + err = mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca, + mlx5_hv_vhca_invalidate); + if (err) + return err; + + agent = mlx5_hv_vhca_control_agent_create(hv_vhca); + if (IS_ERR_OR_NULL(agent)) { + mlx5_hv_unregister_invalidate(hv_vhca->dev); + return IS_ERR_OR_NULL(agent); + } + + hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL] = agent; + + return 0; +} + +void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca) +{ + struct mlx5_hv_vhca_agent *agent; + int i; + + if (IS_ERR_OR_NULL(hv_vhca)) + return; + + agent = hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL]; + if (agent) + mlx5_hv_vhca_control_agent_destroy(agent); + + mutex_lock(&hv_vhca->agents_lock); + for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) + WARN_ON(hv_vhca->agents[i]); + + mutex_unlock(&hv_vhca->agents_lock); + + mlx5_hv_unregister_invalidate(hv_vhca->dev); +} + +static void mlx5_hv_vhca_agents_update(struct mlx5_hv_vhca *hv_vhca) +{ + mlx5_hv_vhca_invalidate(hv_vhca, BIT(MLX5_HV_VHCA_AGENT_CONTROL)); +} + +struct mlx5_hv_vhca_agent * +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca, + enum mlx5_hv_vhca_agent_type type, + void (*control)(struct mlx5_hv_vhca_agent*, + struct mlx5_hv_vhca_control_block *block), + void (*invalidate)(struct mlx5_hv_vhca_agent*, + u64 block_mask), + void (*cleaup)(struct mlx5_hv_vhca_agent *agent), + void *priv) +{ + struct mlx5_hv_vhca_agent *agent; + + if (IS_ERR_OR_NULL(hv_vhca)) + return ERR_PTR(-ENOMEM); + + if (type >= MLX5_HV_VHCA_AGENT_MAX) + return ERR_PTR(-EINVAL); + + mutex_lock(&hv_vhca->agents_lock); + if (hv_vhca->agents[type]) { + mutex_unlock(&hv_vhca->agents_lock); + return ERR_PTR(-EINVAL); + } + mutex_unlock(&hv_vhca->agents_lock); + + agent = kzalloc(sizeof(*agent), GFP_KERNEL); + if (!agent) + return ERR_PTR(-ENOMEM); + + agent->type = type; + agent->hv_vhca = hv_vhca; + agent->priv = priv; + agent->control = control; + agent->invalidate = invalidate; + agent->cleanup = cleaup; + + mutex_lock(&hv_vhca->agents_lock); + hv_vhca->agents[type] = agent; + mutex_unlock(&hv_vhca->agents_lock); + + mlx5_hv_vhca_agents_update(hv_vhca); + + return agent; +} + +void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent) +{ + struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca; + + mutex_lock(&hv_vhca->agents_lock); + + if (WARN_ON(agent != hv_vhca->agents[agent->type])) { + mutex_unlock(&hv_vhca->agents_lock); + return; + } + + hv_vhca->agents[agent->type] = NULL; + mutex_unlock(&hv_vhca->agents_lock); + + if (agent->cleanup) + agent->cleanup(agent); + + kfree(agent); + + mlx5_hv_vhca_agents_update(hv_vhca); +} + +static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent, + struct mlx5_hv_vhca_data_block *data_block, + void *src, int len, int *offset) +{ + int bytes = min_t(int, (int)sizeof(data_block->data), len); + + data_block->sequence = agent->seq; + data_block->offset = (*offset)++; + memcpy(data_block->data, src, bytes); + + return bytes; +} + +static void mlx5_hv_vhca_agent_seq_update(struct mlx5_hv_vhca_agent *agent) +{ + agent->seq++; +} + +int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent, + void *buf, int len) +{ + int offset = agent->type * HV_CONFIG_BLOCK_SIZE_MAX; + int block_offset = 0; + int total = 0; + int err; + + while (len) { + struct mlx5_hv_vhca_data_block data_block = {0}; + int bytes; + + bytes = mlx5_hv_vhca_data_block_prepare(agent, &data_block, + buf + total, + len, &block_offset); + if (!bytes) + return -ENOMEM; + + err = mlx5_hv_write_config(agent->hv_vhca->dev, &data_block, + sizeof(data_block), offset); + if (err) + return err; + + total += bytes; + len -= bytes; + } + + mlx5_hv_vhca_agent_seq_update(agent); + + return 0; +} + +void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent) +{ + return agent->priv; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h new file mode 100644 index 0000000000..f240ffe511 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __LIB_HV_VHCA_H__ +#define __LIB_HV_VHCA_H__ + +#include "en.h" +#include "lib/hv.h" + +struct mlx5_hv_vhca_agent; +struct mlx5_hv_vhca; +struct mlx5_hv_vhca_control_block; + +enum mlx5_hv_vhca_agent_type { + MLX5_HV_VHCA_AGENT_CONTROL = 0, + MLX5_HV_VHCA_AGENT_STATS = 1, + MLX5_HV_VHCA_AGENT_MAX = 32, +}; + +#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) + +struct mlx5_hv_vhca_control_block { + u32 capabilities; + u32 control; + u16 command; + u16 command_ack; + u16 version; + u16 rings; + u32 reserved1[28]; +}; + +struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev); +void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca); +int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca); +void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca); +void mlx5_hv_vhca_invalidate(void *context, u64 block_mask); + +struct mlx5_hv_vhca_agent * +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca, + enum mlx5_hv_vhca_agent_type type, + void (*control)(struct mlx5_hv_vhca_agent*, + struct mlx5_hv_vhca_control_block *block), + void (*invalidate)(struct mlx5_hv_vhca_agent*, + u64 block_mask), + void (*cleanup)(struct mlx5_hv_vhca_agent *agent), + void *context); + +void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent); +int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent, + void *buf, int len); +void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent); + +#else + +static inline struct mlx5_hv_vhca * +mlx5_hv_vhca_create(struct mlx5_core_dev *dev) +{ + return NULL; +} + +static inline void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca) +{ +} + +static inline int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca) +{ + return 0; +} + +static inline void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca) +{ +} + +static inline void mlx5_hv_vhca_invalidate(void *context, + u64 block_mask) +{ +} + +static inline struct mlx5_hv_vhca_agent * +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca, + enum mlx5_hv_vhca_agent_type type, + void (*control)(struct mlx5_hv_vhca_agent*, + struct mlx5_hv_vhca_control_block *block), + void (*invalidate)(struct mlx5_hv_vhca_agent*, + u64 block_mask), + void (*cleanup)(struct mlx5_hv_vhca_agent *agent), + void *context) +{ + return NULL; +} + +static inline void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent) +{ +} +#endif + +#endif /* __LIB_HV_VHCA_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c new file mode 100644 index 0000000000..6e3f178d6f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "fs_core.h" +#include "lib/ipsec_fs_roce.h" +#include "mlx5_core.h" + +struct mlx5_ipsec_miss { + struct mlx5_flow_group *group; + struct mlx5_flow_handle *rule; +}; + +struct mlx5_ipsec_rx_roce { + struct mlx5_flow_group *g; + struct mlx5_flow_table *ft; + struct mlx5_flow_handle *rule; + struct mlx5_ipsec_miss roce_miss; + + struct mlx5_flow_table *ft_rdma; + struct mlx5_flow_namespace *ns_rdma; +}; + +struct mlx5_ipsec_tx_roce { + struct mlx5_flow_group *g; + struct mlx5_flow_table *ft; + struct mlx5_flow_handle *rule; + struct mlx5_flow_namespace *ns; +}; + +struct mlx5_ipsec_fs { + struct mlx5_ipsec_rx_roce ipv4_rx; + struct mlx5_ipsec_rx_roce ipv6_rx; + struct mlx5_ipsec_tx_roce tx; +}; + +static void ipsec_fs_roce_setup_udp_dport(struct mlx5_flow_spec *spec, + u16 dport) +{ + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_UDP); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.udp_dport); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport, dport); +} + +static int +ipsec_fs_roce_rx_rule_setup(struct mlx5_core_dev *mdev, + struct mlx5_flow_destination *default_dst, + struct mlx5_ipsec_rx_roce *roce) +{ + struct mlx5_flow_destination dst = {}; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + ipsec_fs_roce_setup_udp_dport(spec, ROCE_V2_UDP_DPORT); + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dst.type = MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE; + dst.ft = roce->ft_rdma; + rule = mlx5_add_flow_rules(roce->ft, spec, &flow_act, &dst, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Fail to add RX RoCE IPsec rule err=%d\n", + err); + goto fail_add_rule; + } + + roce->rule = rule; + + memset(spec, 0, sizeof(*spec)); + rule = mlx5_add_flow_rules(roce->ft, spec, &flow_act, default_dst, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Fail to add RX RoCE IPsec miss rule err=%d\n", + err); + goto fail_add_default_rule; + } + + roce->roce_miss.rule = rule; + + kvfree(spec); + return 0; + +fail_add_default_rule: + mlx5_del_flow_rules(roce->rule); +fail_add_rule: + kvfree(spec); + return err; +} + +static int ipsec_fs_roce_tx_rule_setup(struct mlx5_core_dev *mdev, + struct mlx5_ipsec_tx_roce *roce, + struct mlx5_flow_table *pol_ft) +{ + struct mlx5_flow_destination dst = {}; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + int err = 0; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dst.type = MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE; + dst.ft = pol_ft; + rule = mlx5_add_flow_rules(roce->ft, NULL, &flow_act, &dst, + 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Fail to add TX RoCE IPsec rule err=%d\n", + err); + goto out; + } + roce->rule = rule; + +out: + return err; +} + +void mlx5_ipsec_fs_roce_tx_destroy(struct mlx5_ipsec_fs *ipsec_roce) +{ + struct mlx5_ipsec_tx_roce *tx_roce; + + if (!ipsec_roce) + return; + + tx_roce = &ipsec_roce->tx; + + mlx5_del_flow_rules(tx_roce->rule); + mlx5_destroy_flow_group(tx_roce->g); + mlx5_destroy_flow_table(tx_roce->ft); +} + +#define MLX5_TX_ROCE_GROUP_SIZE BIT(0) + +int mlx5_ipsec_fs_roce_tx_create(struct mlx5_core_dev *mdev, + struct mlx5_ipsec_fs *ipsec_roce, + struct mlx5_flow_table *pol_ft) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_ipsec_tx_roce *roce; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *g; + int ix = 0; + int err; + u32 *in; + + if (!ipsec_roce) + return 0; + + roce = &ipsec_roce->tx; + + in = kvzalloc(MLX5_ST_SZ_BYTES(create_flow_group_in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + ft_attr.max_fte = 1; + ft = mlx5_create_flow_table(roce->ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, "Fail to create RoCE IPsec tx ft err=%d\n", err); + goto free_in; + } + + roce->ft = ft; + + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_TX_ROCE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + g = mlx5_create_flow_group(ft, in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_err(mdev, "Fail to create RoCE IPsec tx group err=%d\n", err); + goto destroy_table; + } + roce->g = g; + + err = ipsec_fs_roce_tx_rule_setup(mdev, roce, pol_ft); + if (err) { + mlx5_core_err(mdev, "Fail to create RoCE IPsec tx rules err=%d\n", err); + goto destroy_group; + } + + kvfree(in); + return 0; + +destroy_group: + mlx5_destroy_flow_group(roce->g); +destroy_table: + mlx5_destroy_flow_table(ft); +free_in: + kvfree(in); + return err; +} + +struct mlx5_flow_table *mlx5_ipsec_fs_roce_ft_get(struct mlx5_ipsec_fs *ipsec_roce, u32 family) +{ + struct mlx5_ipsec_rx_roce *rx_roce; + + if (!ipsec_roce) + return NULL; + + rx_roce = (family == AF_INET) ? &ipsec_roce->ipv4_rx : + &ipsec_roce->ipv6_rx; + + return rx_roce->ft; +} + +void mlx5_ipsec_fs_roce_rx_destroy(struct mlx5_ipsec_fs *ipsec_roce, u32 family) +{ + struct mlx5_ipsec_rx_roce *rx_roce; + + if (!ipsec_roce) + return; + + rx_roce = (family == AF_INET) ? &ipsec_roce->ipv4_rx : + &ipsec_roce->ipv6_rx; + + mlx5_del_flow_rules(rx_roce->roce_miss.rule); + mlx5_del_flow_rules(rx_roce->rule); + mlx5_destroy_flow_table(rx_roce->ft_rdma); + mlx5_destroy_flow_group(rx_roce->roce_miss.group); + mlx5_destroy_flow_group(rx_roce->g); + mlx5_destroy_flow_table(rx_roce->ft); +} + +#define MLX5_RX_ROCE_GROUP_SIZE BIT(0) + +int mlx5_ipsec_fs_roce_rx_create(struct mlx5_core_dev *mdev, + struct mlx5_ipsec_fs *ipsec_roce, + struct mlx5_flow_namespace *ns, + struct mlx5_flow_destination *default_dst, + u32 family, u32 level, u32 prio) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_ipsec_rx_roce *roce; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *g; + void *outer_headers_c; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + if (!ipsec_roce) + return 0; + + roce = (family == AF_INET) ? &ipsec_roce->ipv4_rx : + &ipsec_roce->ipv6_rx; + + ft_attr.max_fte = 2; + ft_attr.level = level; + ft_attr.prio = prio; + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, "Fail to create RoCE IPsec rx ft at nic err=%d\n", err); + return err; + } + + roce->ft = ft; + + in = kvzalloc(MLX5_ST_SZ_BYTES(create_flow_group_in), GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto fail_nomem; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport); + + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_RX_ROCE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + g = mlx5_create_flow_group(ft, in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_err(mdev, "Fail to create RoCE IPsec rx group at nic err=%d\n", err); + goto fail_group; + } + roce->g = g; + + memset(in, 0, MLX5_ST_SZ_BYTES(create_flow_group_in)); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_RX_ROCE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + g = mlx5_create_flow_group(ft, in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_err(mdev, "Fail to create RoCE IPsec rx miss group at nic err=%d\n", err); + goto fail_mgroup; + } + roce->roce_miss.group = g; + + memset(&ft_attr, 0, sizeof(ft_attr)); + if (family == AF_INET) + ft_attr.level = 1; + ft = mlx5_create_flow_table(roce->ns_rdma, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, "Fail to create RoCE IPsec rx ft at rdma err=%d\n", err); + goto fail_rdma_table; + } + + roce->ft_rdma = ft; + + err = ipsec_fs_roce_rx_rule_setup(mdev, default_dst, roce); + if (err) { + mlx5_core_err(mdev, "Fail to create RoCE IPsec rx rules err=%d\n", err); + goto fail_setup_rule; + } + + kvfree(in); + return 0; + +fail_setup_rule: + mlx5_destroy_flow_table(roce->ft_rdma); +fail_rdma_table: + mlx5_destroy_flow_group(roce->roce_miss.group); +fail_mgroup: + mlx5_destroy_flow_group(roce->g); +fail_group: + kvfree(in); +fail_nomem: + mlx5_destroy_flow_table(roce->ft); + return err; +} + +void mlx5_ipsec_fs_roce_cleanup(struct mlx5_ipsec_fs *ipsec_roce) +{ + kfree(ipsec_roce); +} + +struct mlx5_ipsec_fs *mlx5_ipsec_fs_roce_init(struct mlx5_core_dev *mdev) +{ + struct mlx5_ipsec_fs *roce_ipsec; + struct mlx5_flow_namespace *ns; + + ns = mlx5_get_flow_namespace(mdev, MLX5_FLOW_NAMESPACE_RDMA_RX_IPSEC); + if (!ns) { + mlx5_core_err(mdev, "Failed to get RoCE rx ns\n"); + return NULL; + } + + roce_ipsec = kzalloc(sizeof(*roce_ipsec), GFP_KERNEL); + if (!roce_ipsec) + return NULL; + + roce_ipsec->ipv4_rx.ns_rdma = ns; + roce_ipsec->ipv6_rx.ns_rdma = ns; + + ns = mlx5_get_flow_namespace(mdev, MLX5_FLOW_NAMESPACE_RDMA_TX_IPSEC); + if (!ns) { + mlx5_core_err(mdev, "Failed to get RoCE tx ns\n"); + goto err_tx; + } + + roce_ipsec->tx.ns = ns; + + return roce_ipsec; + +err_tx: + kfree(roce_ipsec); + return NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.h new file mode 100644 index 0000000000..9712d705fe --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_LIB_IPSEC_H__ +#define __MLX5_LIB_IPSEC_H__ + +struct mlx5_ipsec_fs; + +struct mlx5_flow_table * +mlx5_ipsec_fs_roce_ft_get(struct mlx5_ipsec_fs *ipsec_roce, u32 family); +void mlx5_ipsec_fs_roce_rx_destroy(struct mlx5_ipsec_fs *ipsec_roce, + u32 family); +int mlx5_ipsec_fs_roce_rx_create(struct mlx5_core_dev *mdev, + struct mlx5_ipsec_fs *ipsec_roce, + struct mlx5_flow_namespace *ns, + struct mlx5_flow_destination *default_dst, + u32 family, u32 level, u32 prio); +void mlx5_ipsec_fs_roce_tx_destroy(struct mlx5_ipsec_fs *ipsec_roce); +int mlx5_ipsec_fs_roce_tx_create(struct mlx5_core_dev *mdev, + struct mlx5_ipsec_fs *ipsec_roce, + struct mlx5_flow_table *pol_ft); +void mlx5_ipsec_fs_roce_cleanup(struct mlx5_ipsec_fs *ipsec_roce); +struct mlx5_ipsec_fs *mlx5_ipsec_fs_roce_init(struct mlx5_core_dev *mdev); + +#endif /* __MLX5_LIB_IPSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.c new file mode 100644 index 0000000000..4a078113e2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.c @@ -0,0 +1,2411 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include +#include +#include +#include +#include +#include "fs_core.h" +#include "lib/macsec_fs.h" +#include "mlx5_core.h" + +/* MACsec TX flow steering */ +#define CRYPTO_NUM_MAXSEC_FTE BIT(15) +#define CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE 1 + +#define TX_CRYPTO_TABLE_LEVEL 0 +#define TX_CRYPTO_TABLE_NUM_GROUPS 3 +#define TX_CRYPTO_TABLE_MKE_GROUP_SIZE 1 +#define TX_CRYPTO_TABLE_SA_GROUP_SIZE \ + (CRYPTO_NUM_MAXSEC_FTE - (TX_CRYPTO_TABLE_MKE_GROUP_SIZE + \ + CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE)) +#define TX_CHECK_TABLE_LEVEL 1 +#define TX_CHECK_TABLE_NUM_FTE 2 +#define RX_CRYPTO_TABLE_LEVEL 0 +#define RX_CHECK_TABLE_LEVEL 1 +#define RX_ROCE_TABLE_LEVEL 2 +#define RX_CHECK_TABLE_NUM_FTE 3 +#define RX_ROCE_TABLE_NUM_FTE 2 +#define RX_CRYPTO_TABLE_NUM_GROUPS 3 +#define RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE \ + ((CRYPTO_NUM_MAXSEC_FTE - CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE) / 2) +#define RX_CRYPTO_TABLE_SA_RULE_WITHOUT_SCI_GROUP_SIZE \ + (CRYPTO_NUM_MAXSEC_FTE - RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE) +#define RX_NUM_OF_RULES_PER_SA 2 + +#define RDMA_RX_ROCE_IP_TABLE_LEVEL 0 +#define RDMA_RX_ROCE_MACSEC_OP_TABLE_LEVEL 1 + +#define MLX5_MACSEC_TAG_LEN 8 /* SecTAG length with ethertype and without the optional SCI */ +#define MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK 0x23 +#define MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET 0x8 +#define MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET 0x5 +#define MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT (0x1 << MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET) +#define MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI 0x8 +#define MLX5_SECTAG_HEADER_SIZE_WITH_SCI (MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI + MACSEC_SCI_LEN) + +/* MACsec RX flow steering */ +#define MLX5_ETH_WQE_FT_META_MACSEC_MASK 0x3E + +/* MACsec fs_id handling for steering */ +#define macsec_fs_set_tx_fs_id(fs_id) (MLX5_ETH_WQE_FT_META_MACSEC | (fs_id) << 2) +#define macsec_fs_set_rx_fs_id(fs_id) ((fs_id) | BIT(30)) + +struct mlx5_sectag_header { + __be16 ethertype; + u8 tci_an; + u8 sl; + u32 pn; + u8 sci[MACSEC_SCI_LEN]; /* optional */ +} __packed; + +struct mlx5_roce_macsec_tx_rule { + u32 fs_id; + u16 gid_idx; + struct list_head entry; + struct mlx5_flow_handle *rule; + struct mlx5_modify_hdr *meta_modhdr; +}; + +struct mlx5_macsec_tx_rule { + struct mlx5_flow_handle *rule; + struct mlx5_pkt_reformat *pkt_reformat; + u32 fs_id; +}; + +struct mlx5_macsec_flow_table { + int num_groups; + struct mlx5_flow_table *t; + struct mlx5_flow_group **g; +}; + +struct mlx5_macsec_tables { + struct mlx5_macsec_flow_table ft_crypto; + struct mlx5_flow_handle *crypto_miss_rule; + + struct mlx5_flow_table *ft_check; + struct mlx5_flow_group *ft_check_group; + struct mlx5_fc *check_miss_rule_counter; + struct mlx5_flow_handle *check_miss_rule; + struct mlx5_fc *check_rule_counter; + + u32 refcnt; +}; + +struct mlx5_fs_id { + u32 id; + refcount_t refcnt; + sci_t sci; + struct rhash_head hash; +}; + +struct mlx5_macsec_device { + struct list_head macsec_devices_list_entry; + void *macdev; + struct xarray tx_id_xa; + struct xarray rx_id_xa; +}; + +struct mlx5_macsec_tx { + struct mlx5_flow_handle *crypto_mke_rule; + struct mlx5_flow_handle *check_rule; + + struct ida tx_halloc; + + struct mlx5_macsec_tables tables; + + struct mlx5_flow_table *ft_rdma_tx; +}; + +struct mlx5_roce_macsec_rx_rule { + u32 fs_id; + u16 gid_idx; + struct mlx5_flow_handle *op; + struct mlx5_flow_handle *ip; + struct list_head entry; +}; + +struct mlx5_macsec_rx_rule { + struct mlx5_flow_handle *rule[RX_NUM_OF_RULES_PER_SA]; + struct mlx5_modify_hdr *meta_modhdr; +}; + +struct mlx5_macsec_miss { + struct mlx5_flow_group *g; + struct mlx5_flow_handle *rule; +}; + +struct mlx5_macsec_rx_roce { + /* Flow table/rules in NIC domain, to check if it's a RoCE packet */ + struct mlx5_flow_group *g; + struct mlx5_flow_table *ft; + struct mlx5_flow_handle *rule; + struct mlx5_modify_hdr *copy_modify_hdr; + struct mlx5_macsec_miss nic_miss; + + /* Flow table/rule in RDMA domain, to check dgid */ + struct mlx5_flow_table *ft_ip_check; + struct mlx5_flow_table *ft_macsec_op_check; + struct mlx5_macsec_miss miss; +}; + +struct mlx5_macsec_rx { + struct mlx5_flow_handle *check_rule[2]; + struct mlx5_pkt_reformat *check_rule_pkt_reformat[2]; + + struct mlx5_macsec_tables tables; + struct mlx5_macsec_rx_roce roce; +}; + +union mlx5_macsec_rule { + struct mlx5_macsec_tx_rule tx_rule; + struct mlx5_macsec_rx_rule rx_rule; +}; + +static const struct rhashtable_params rhash_sci = { + .key_len = sizeof_field(struct mlx5_fs_id, sci), + .key_offset = offsetof(struct mlx5_fs_id, sci), + .head_offset = offsetof(struct mlx5_fs_id, hash), + .automatic_shrinking = true, + .min_size = 1, +}; + +static const struct rhashtable_params rhash_fs_id = { + .key_len = sizeof_field(struct mlx5_fs_id, id), + .key_offset = offsetof(struct mlx5_fs_id, id), + .head_offset = offsetof(struct mlx5_fs_id, hash), + .automatic_shrinking = true, + .min_size = 1, +}; + +struct mlx5_macsec_fs { + struct mlx5_core_dev *mdev; + struct mlx5_macsec_tx *tx_fs; + struct mlx5_macsec_rx *rx_fs; + + /* Stats manage */ + struct mlx5_macsec_stats stats; + + /* Tx sci -> fs id mapping handling */ + struct rhashtable sci_hash; /* sci -> mlx5_fs_id */ + + /* RX fs_id -> mlx5_fs_id mapping handling */ + struct rhashtable fs_id_hash; /* fs_id -> mlx5_fs_id */ + + /* TX & RX fs_id lists per macsec device */ + struct list_head macsec_devices_list; +}; + +static void macsec_fs_destroy_groups(struct mlx5_macsec_flow_table *ft) +{ + int i; + + for (i = ft->num_groups - 1; i >= 0; i--) { + if (!IS_ERR_OR_NULL(ft->g[i])) + mlx5_destroy_flow_group(ft->g[i]); + ft->g[i] = NULL; + } + ft->num_groups = 0; +} + +static void macsec_fs_destroy_flow_table(struct mlx5_macsec_flow_table *ft) +{ + macsec_fs_destroy_groups(ft); + kfree(ft->g); + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; +} + +static void macsec_fs_tx_destroy(struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_macsec_tx *tx_fs = macsec_fs->tx_fs; + struct mlx5_macsec_tables *tx_tables; + + if (mlx5_is_macsec_roce_supported(macsec_fs->mdev)) + mlx5_destroy_flow_table(tx_fs->ft_rdma_tx); + + tx_tables = &tx_fs->tables; + + /* Tx check table */ + if (tx_fs->check_rule) { + mlx5_del_flow_rules(tx_fs->check_rule); + tx_fs->check_rule = NULL; + } + + if (tx_tables->check_miss_rule) { + mlx5_del_flow_rules(tx_tables->check_miss_rule); + tx_tables->check_miss_rule = NULL; + } + + if (tx_tables->ft_check_group) { + mlx5_destroy_flow_group(tx_tables->ft_check_group); + tx_tables->ft_check_group = NULL; + } + + if (tx_tables->ft_check) { + mlx5_destroy_flow_table(tx_tables->ft_check); + tx_tables->ft_check = NULL; + } + + /* Tx crypto table */ + if (tx_fs->crypto_mke_rule) { + mlx5_del_flow_rules(tx_fs->crypto_mke_rule); + tx_fs->crypto_mke_rule = NULL; + } + + if (tx_tables->crypto_miss_rule) { + mlx5_del_flow_rules(tx_tables->crypto_miss_rule); + tx_tables->crypto_miss_rule = NULL; + } + + macsec_fs_destroy_flow_table(&tx_tables->ft_crypto); +} + +static int macsec_fs_tx_create_crypto_table_groups(struct mlx5_macsec_flow_table *ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int mclen = MLX5_ST_SZ_BYTES(fte_match_param); + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(TX_CRYPTO_TABLE_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + in = kvzalloc(inlen, GFP_KERNEL); + + if (!in) { + kfree(ft->g); + ft->g = NULL; + return -ENOMEM; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + /* Flow Group for MKE match */ + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + + MLX5_SET_CFG(in, start_flow_index, ix); + ix += TX_CRYPTO_TABLE_MKE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Flow Group for SA rules */ + memset(in, 0, inlen); + memset(mc, 0, mclen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS_2); + MLX5_SET(fte_match_param, mc, misc_parameters_2.metadata_reg_a, + MLX5_ETH_WQE_FT_META_MACSEC_MASK); + + MLX5_SET_CFG(in, start_flow_index, ix); + ix += TX_CRYPTO_TABLE_SA_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Flow Group for l2 traps */ + memset(in, 0, inlen); + memset(mc, 0, mclen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + kvfree(in); + + return err; +} + +static struct mlx5_flow_table + *macsec_fs_auto_group_table_create(struct mlx5_flow_namespace *ns, int flags, + int level, int max_fte) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_table *fdb = NULL; + + /* reserve entry for the match all miss group and rule */ + ft_attr.autogroup.num_reserved_entries = 1; + ft_attr.autogroup.max_num_groups = 1; + ft_attr.prio = 0; + ft_attr.flags = flags; + ft_attr.level = level; + ft_attr.max_fte = max_fte; + + fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + + return fdb; +} + +enum { + RDMA_TX_MACSEC_LEVEL = 0, +}; + +static int macsec_fs_tx_roce_create(struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_macsec_tx *tx_fs = macsec_fs->tx_fs; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft; + int err; + + if (!mlx5_is_macsec_roce_supported(mdev)) { + mlx5_core_dbg(mdev, "Failed to init RoCE MACsec, capabilities not supported\n"); + return 0; + } + + ns = mlx5_get_flow_namespace(mdev, MLX5_FLOW_NAMESPACE_RDMA_TX_MACSEC); + if (!ns) + return -ENOMEM; + + /* Tx RoCE crypto table */ + ft = macsec_fs_auto_group_table_create(ns, 0, RDMA_TX_MACSEC_LEVEL, CRYPTO_NUM_MAXSEC_FTE); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, "Failed to create MACsec RoCE Tx crypto table err(%d)\n", err); + return err; + } + tx_fs->ft_rdma_tx = ft; + + return 0; +} + +static int macsec_fs_tx_create(struct mlx5_macsec_fs *macsec_fs) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_macsec_tx *tx_fs = macsec_fs->tx_fs; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_destination dest = {}; + struct mlx5_macsec_tables *tx_tables; + struct mlx5_flow_act flow_act = {}; + struct mlx5_macsec_flow_table *ft_crypto; + struct mlx5_flow_table *flow_table; + struct mlx5_flow_group *flow_group; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + u32 *flow_group_in; + int err; + + ns = mlx5_get_flow_namespace(mdev, MLX5_FLOW_NAMESPACE_EGRESS_MACSEC); + if (!ns) + return -ENOMEM; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) { + err = -ENOMEM; + goto out_spec; + } + + tx_tables = &tx_fs->tables; + ft_crypto = &tx_tables->ft_crypto; + + /* Tx crypto table */ + ft_attr.flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + ft_attr.level = TX_CRYPTO_TABLE_LEVEL; + ft_attr.max_fte = CRYPTO_NUM_MAXSEC_FTE; + + flow_table = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(flow_table)) { + err = PTR_ERR(flow_table); + mlx5_core_err(mdev, "Failed to create MACsec Tx crypto table err(%d)\n", err); + goto out_flow_group; + } + ft_crypto->t = flow_table; + + /* Tx crypto table groups */ + err = macsec_fs_tx_create_crypto_table_groups(ft_crypto); + if (err) { + mlx5_core_err(mdev, + "Failed to create default flow group for MACsec Tx crypto table err(%d)\n", + err); + goto err; + } + + /* Tx crypto table MKE rule - MKE packets shouldn't be offloaded */ + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ETH_P_PAE); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + + rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, NULL, 0); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Failed to add MACsec TX MKE rule, err=%d\n", err); + goto err; + } + tx_fs->crypto_mke_rule = rule; + + /* Tx crypto table Default miss rule */ + memset(&flow_act, 0, sizeof(flow_act)); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + rule = mlx5_add_flow_rules(ft_crypto->t, NULL, &flow_act, NULL, 0); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Failed to add MACsec Tx table default miss rule %d\n", err); + goto err; + } + tx_tables->crypto_miss_rule = rule; + + /* Tx check table */ + flow_table = macsec_fs_auto_group_table_create(ns, 0, TX_CHECK_TABLE_LEVEL, + TX_CHECK_TABLE_NUM_FTE); + if (IS_ERR(flow_table)) { + err = PTR_ERR(flow_table); + mlx5_core_err(mdev, "Fail to create MACsec TX check table, err(%d)\n", err); + goto err; + } + tx_tables->ft_check = flow_table; + + /* Tx check table Default miss group/rule */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_table->max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_table->max_fte - 1); + flow_group = mlx5_create_flow_group(tx_tables->ft_check, flow_group_in); + if (IS_ERR(flow_group)) { + err = PTR_ERR(flow_group); + mlx5_core_err(mdev, + "Failed to create default flow group for MACsec Tx crypto table err(%d)\n", + err); + goto err; + } + tx_tables->ft_check_group = flow_group; + + /* Tx check table default drop rule */ + memset(&dest, 0, sizeof(struct mlx5_flow_destination)); + memset(&flow_act, 0, sizeof(flow_act)); + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter_id = mlx5_fc_id(tx_tables->check_miss_rule_counter); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; + rule = mlx5_add_flow_rules(tx_tables->ft_check, NULL, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Failed to added MACsec tx check drop rule, err(%d)\n", err); + goto err; + } + tx_tables->check_miss_rule = rule; + + /* Tx check table rule */ + memset(spec, 0, sizeof(struct mlx5_flow_spec)); + memset(&dest, 0, sizeof(struct mlx5_flow_destination)); + memset(&flow_act, 0, sizeof(flow_act)); + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_4); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_4, 0); + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + + flow_act.flags = FLOW_ACT_NO_APPEND; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW | MLX5_FLOW_CONTEXT_ACTION_COUNT; + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter_id = mlx5_fc_id(tx_tables->check_rule_counter); + rule = mlx5_add_flow_rules(tx_tables->ft_check, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Failed to add MACsec check rule, err=%d\n", err); + goto err; + } + tx_fs->check_rule = rule; + + err = macsec_fs_tx_roce_create(macsec_fs); + if (err) + goto err; + + kvfree(flow_group_in); + kvfree(spec); + return 0; + +err: + macsec_fs_tx_destroy(macsec_fs); +out_flow_group: + kvfree(flow_group_in); +out_spec: + kvfree(spec); + return err; +} + +static int macsec_fs_tx_ft_get(struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_macsec_tx *tx_fs = macsec_fs->tx_fs; + struct mlx5_macsec_tables *tx_tables; + int err = 0; + + tx_tables = &tx_fs->tables; + if (tx_tables->refcnt) + goto out; + + err = macsec_fs_tx_create(macsec_fs); + if (err) + return err; + +out: + tx_tables->refcnt++; + return err; +} + +static void macsec_fs_tx_ft_put(struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_macsec_tables *tx_tables = &macsec_fs->tx_fs->tables; + + if (--tx_tables->refcnt) + return; + + macsec_fs_tx_destroy(macsec_fs); +} + +static int macsec_fs_tx_setup_fte(struct mlx5_macsec_fs *macsec_fs, + struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + u32 macsec_obj_id, + u32 *fs_id) +{ + struct mlx5_macsec_tx *tx_fs = macsec_fs->tx_fs; + int err = 0; + u32 id; + + err = ida_alloc_range(&tx_fs->tx_halloc, 1, + MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES, + GFP_KERNEL); + if (err < 0) + return err; + + id = err; + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + + /* Metadata match */ + MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_a, + MLX5_ETH_WQE_FT_META_MACSEC_MASK); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_a, + macsec_fs_set_tx_fs_id(id)); + + *fs_id = id; + flow_act->crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_MACSEC; + flow_act->crypto.obj_id = macsec_obj_id; + + mlx5_core_dbg(macsec_fs->mdev, "Tx fte: macsec obj_id %u, fs_id %u\n", macsec_obj_id, id); + return 0; +} + +static void macsec_fs_tx_create_sectag_header(const struct macsec_context *ctx, + char *reformatbf, + size_t *reformat_size) +{ + const struct macsec_secy *secy = ctx->secy; + bool sci_present = macsec_send_sci(secy); + struct mlx5_sectag_header sectag = {}; + const struct macsec_tx_sc *tx_sc; + + tx_sc = &secy->tx_sc; + sectag.ethertype = htons(ETH_P_MACSEC); + + if (sci_present) { + sectag.tci_an |= MACSEC_TCI_SC; + memcpy(§ag.sci, &secy->sci, + sizeof(sectag.sci)); + } else { + if (tx_sc->end_station) + sectag.tci_an |= MACSEC_TCI_ES; + if (tx_sc->scb) + sectag.tci_an |= MACSEC_TCI_SCB; + } + + /* With GCM, C/E clear for !encrypt, both set for encrypt */ + if (tx_sc->encrypt) + sectag.tci_an |= MACSEC_TCI_CONFID; + else if (secy->icv_len != MACSEC_DEFAULT_ICV_LEN) + sectag.tci_an |= MACSEC_TCI_C; + + sectag.tci_an |= tx_sc->encoding_sa; + + *reformat_size = MLX5_MACSEC_TAG_LEN + (sci_present ? MACSEC_SCI_LEN : 0); + + memcpy(reformatbf, §ag, *reformat_size); +} + +static bool macsec_fs_is_macsec_device_empty(struct mlx5_macsec_device *macsec_device) +{ + if (xa_empty(&macsec_device->tx_id_xa) && + xa_empty(&macsec_device->rx_id_xa)) + return true; + + return false; +} + +static void macsec_fs_id_del(struct list_head *macsec_devices_list, u32 fs_id, + void *macdev, struct rhashtable *hash_table, bool is_tx) +{ + const struct rhashtable_params *rhash = (is_tx) ? &rhash_sci : &rhash_fs_id; + struct mlx5_macsec_device *iter, *macsec_device = NULL; + struct mlx5_fs_id *fs_id_found; + struct xarray *fs_id_xa; + + list_for_each_entry(iter, macsec_devices_list, macsec_devices_list_entry) { + if (iter->macdev == macdev) { + macsec_device = iter; + break; + } + } + WARN_ON(!macsec_device); + + fs_id_xa = (is_tx) ? &macsec_device->tx_id_xa : + &macsec_device->rx_id_xa; + xa_lock(fs_id_xa); + fs_id_found = xa_load(fs_id_xa, fs_id); + WARN_ON(!fs_id_found); + + if (!refcount_dec_and_test(&fs_id_found->refcnt)) { + xa_unlock(fs_id_xa); + return; + } + + if (fs_id_found->id) { + /* Make sure ongoing datapath readers sees a valid SA */ + rhashtable_remove_fast(hash_table, &fs_id_found->hash, *rhash); + fs_id_found->id = 0; + } + xa_unlock(fs_id_xa); + + xa_erase(fs_id_xa, fs_id); + + kfree(fs_id_found); + + if (macsec_fs_is_macsec_device_empty(macsec_device)) { + list_del(&macsec_device->macsec_devices_list_entry); + kfree(macsec_device); + } +} + +static int macsec_fs_id_add(struct list_head *macsec_devices_list, u32 fs_id, + void *macdev, struct rhashtable *hash_table, sci_t sci, + bool is_tx) +{ + const struct rhashtable_params *rhash = (is_tx) ? &rhash_sci : &rhash_fs_id; + struct mlx5_macsec_device *iter, *macsec_device = NULL; + struct mlx5_fs_id *fs_id_iter; + struct xarray *fs_id_xa; + int err; + + if (!is_tx) { + rcu_read_lock(); + fs_id_iter = rhashtable_lookup(hash_table, &fs_id, rhash_fs_id); + if (fs_id_iter) { + refcount_inc(&fs_id_iter->refcnt); + rcu_read_unlock(); + return 0; + } + rcu_read_unlock(); + } + + fs_id_iter = kzalloc(sizeof(*fs_id_iter), GFP_KERNEL); + if (!fs_id_iter) + return -ENOMEM; + + list_for_each_entry(iter, macsec_devices_list, macsec_devices_list_entry) { + if (iter->macdev == macdev) { + macsec_device = iter; + break; + } + } + + if (!macsec_device) { /* first time adding a SA to that device */ + macsec_device = kzalloc(sizeof(*macsec_device), GFP_KERNEL); + if (!macsec_device) { + err = -ENOMEM; + goto err_alloc_dev; + } + macsec_device->macdev = macdev; + xa_init(&macsec_device->tx_id_xa); + xa_init(&macsec_device->rx_id_xa); + list_add(&macsec_device->macsec_devices_list_entry, macsec_devices_list); + } + + fs_id_xa = (is_tx) ? &macsec_device->tx_id_xa : + &macsec_device->rx_id_xa; + fs_id_iter->id = fs_id; + refcount_set(&fs_id_iter->refcnt, 1); + fs_id_iter->sci = sci; + err = xa_err(xa_store(fs_id_xa, fs_id, fs_id_iter, GFP_KERNEL)); + if (err) + goto err_store_id; + + err = rhashtable_insert_fast(hash_table, &fs_id_iter->hash, *rhash); + if (err) + goto err_hash_insert; + + return 0; + +err_hash_insert: + xa_erase(fs_id_xa, fs_id); +err_store_id: + if (macsec_fs_is_macsec_device_empty(macsec_device)) { + list_del(&macsec_device->macsec_devices_list_entry); + kfree(macsec_device); + } +err_alloc_dev: + kfree(fs_id_iter); + return err; +} + +static void macsec_fs_tx_del_rule(struct mlx5_macsec_fs *macsec_fs, + struct mlx5_macsec_tx_rule *tx_rule, + void *macdev) +{ + macsec_fs_id_del(&macsec_fs->macsec_devices_list, tx_rule->fs_id, macdev, + &macsec_fs->sci_hash, true); + + if (tx_rule->rule) { + mlx5_del_flow_rules(tx_rule->rule); + tx_rule->rule = NULL; + } + + if (tx_rule->pkt_reformat) { + mlx5_packet_reformat_dealloc(macsec_fs->mdev, tx_rule->pkt_reformat); + tx_rule->pkt_reformat = NULL; + } + + if (tx_rule->fs_id) { + ida_free(&macsec_fs->tx_fs->tx_halloc, tx_rule->fs_id); + tx_rule->fs_id = 0; + } + + kfree(tx_rule); + + macsec_fs_tx_ft_put(macsec_fs); +} + +#define MLX5_REFORMAT_PARAM_ADD_MACSEC_OFFSET_4_BYTES 1 + +static union mlx5_macsec_rule * +macsec_fs_tx_add_rule(struct mlx5_macsec_fs *macsec_fs, + const struct macsec_context *macsec_ctx, + struct mlx5_macsec_rule_attrs *attrs, u32 *fs_id) +{ + char reformatbf[MLX5_MACSEC_TAG_LEN + MACSEC_SCI_LEN]; + struct mlx5_pkt_reformat_params reformat_params = {}; + struct mlx5_macsec_tx *tx_fs = macsec_fs->tx_fs; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + union mlx5_macsec_rule *macsec_rule = NULL; + struct mlx5_flow_destination dest = {}; + struct mlx5_macsec_tables *tx_tables; + struct mlx5_macsec_tx_rule *tx_rule; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + size_t reformat_size; + int err = 0; + + tx_tables = &tx_fs->tables; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return NULL; + + err = macsec_fs_tx_ft_get(macsec_fs); + if (err) + goto out_spec; + + macsec_rule = kzalloc(sizeof(*macsec_rule), GFP_KERNEL); + if (!macsec_rule) { + macsec_fs_tx_ft_put(macsec_fs); + goto out_spec; + } + + tx_rule = &macsec_rule->tx_rule; + + /* Tx crypto table crypto rule */ + macsec_fs_tx_create_sectag_header(macsec_ctx, reformatbf, &reformat_size); + + reformat_params.type = MLX5_REFORMAT_TYPE_ADD_MACSEC; + reformat_params.size = reformat_size; + reformat_params.data = reformatbf; + + if (is_vlan_dev(macsec_ctx->netdev)) + reformat_params.param_0 = MLX5_REFORMAT_PARAM_ADD_MACSEC_OFFSET_4_BYTES; + + flow_act.pkt_reformat = mlx5_packet_reformat_alloc(mdev, + &reformat_params, + MLX5_FLOW_NAMESPACE_EGRESS_MACSEC); + if (IS_ERR(flow_act.pkt_reformat)) { + err = PTR_ERR(flow_act.pkt_reformat); + mlx5_core_err(mdev, "Failed to allocate MACsec Tx reformat context err=%d\n", err); + goto err; + } + tx_rule->pkt_reformat = flow_act.pkt_reformat; + + err = macsec_fs_tx_setup_fte(macsec_fs, spec, &flow_act, attrs->macsec_obj_id, fs_id); + if (err) { + mlx5_core_err(mdev, + "Failed to add packet reformat for MACsec TX crypto rule, err=%d\n", + err); + goto err; + } + + tx_rule->fs_id = *fs_id; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT | + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = tx_tables->ft_check; + rule = mlx5_add_flow_rules(tx_tables->ft_crypto.t, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Failed to add MACsec TX crypto rule, err=%d\n", err); + goto err; + } + tx_rule->rule = rule; + + err = macsec_fs_id_add(&macsec_fs->macsec_devices_list, *fs_id, macsec_ctx->secy->netdev, + &macsec_fs->sci_hash, attrs->sci, true); + if (err) { + mlx5_core_err(mdev, "Failed to save fs_id, err=%d\n", err); + goto err; + } + + goto out_spec; + +err: + macsec_fs_tx_del_rule(macsec_fs, tx_rule, macsec_ctx->secy->netdev); + macsec_rule = NULL; +out_spec: + kvfree(spec); + + return macsec_rule; +} + +static void macsec_fs_tx_cleanup(struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_macsec_tx *tx_fs = macsec_fs->tx_fs; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5_macsec_tables *tx_tables; + + if (!tx_fs) + return; + + tx_tables = &tx_fs->tables; + if (tx_tables->refcnt) { + mlx5_core_err(mdev, + "Can't destroy MACsec offload tx_fs, refcnt(%u) isn't 0\n", + tx_tables->refcnt); + return; + } + + ida_destroy(&tx_fs->tx_halloc); + + if (tx_tables->check_miss_rule_counter) { + mlx5_fc_destroy(mdev, tx_tables->check_miss_rule_counter); + tx_tables->check_miss_rule_counter = NULL; + } + + if (tx_tables->check_rule_counter) { + mlx5_fc_destroy(mdev, tx_tables->check_rule_counter); + tx_tables->check_rule_counter = NULL; + } + + kfree(tx_fs); + macsec_fs->tx_fs = NULL; +} + +static int macsec_fs_tx_init(struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5_macsec_tables *tx_tables; + struct mlx5_macsec_tx *tx_fs; + struct mlx5_fc *flow_counter; + int err; + + tx_fs = kzalloc(sizeof(*tx_fs), GFP_KERNEL); + if (!tx_fs) + return -ENOMEM; + + tx_tables = &tx_fs->tables; + + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + mlx5_core_err(mdev, + "Failed to create MACsec Tx encrypt flow counter, err(%d)\n", + err); + goto err_encrypt_counter; + } + tx_tables->check_rule_counter = flow_counter; + + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + mlx5_core_err(mdev, + "Failed to create MACsec Tx drop flow counter, err(%d)\n", + err); + goto err_drop_counter; + } + tx_tables->check_miss_rule_counter = flow_counter; + + ida_init(&tx_fs->tx_halloc); + INIT_LIST_HEAD(&macsec_fs->macsec_devices_list); + + macsec_fs->tx_fs = tx_fs; + + return 0; + +err_drop_counter: + mlx5_fc_destroy(mdev, tx_tables->check_rule_counter); + tx_tables->check_rule_counter = NULL; + +err_encrypt_counter: + kfree(tx_fs); + macsec_fs->tx_fs = NULL; + + return err; +} + +static void macsec_fs_rx_roce_miss_destroy(struct mlx5_macsec_miss *miss) +{ + mlx5_del_flow_rules(miss->rule); + mlx5_destroy_flow_group(miss->g); +} + +static void macsec_fs_rdma_rx_destroy(struct mlx5_macsec_rx_roce *roce, struct mlx5_core_dev *mdev) +{ + if (!mlx5_is_macsec_roce_supported(mdev)) + return; + + mlx5_del_flow_rules(roce->nic_miss.rule); + mlx5_del_flow_rules(roce->rule); + mlx5_modify_header_dealloc(mdev, roce->copy_modify_hdr); + mlx5_destroy_flow_group(roce->nic_miss.g); + mlx5_destroy_flow_group(roce->g); + mlx5_destroy_flow_table(roce->ft); + + macsec_fs_rx_roce_miss_destroy(&roce->miss); + mlx5_destroy_flow_table(roce->ft_macsec_op_check); + mlx5_destroy_flow_table(roce->ft_ip_check); +} + +static void macsec_fs_rx_destroy(struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_macsec_rx *rx_fs = macsec_fs->rx_fs; + struct mlx5_macsec_tables *rx_tables; + int i; + + /* Rx check table */ + for (i = 1; i >= 0; --i) { + if (rx_fs->check_rule[i]) { + mlx5_del_flow_rules(rx_fs->check_rule[i]); + rx_fs->check_rule[i] = NULL; + } + + if (rx_fs->check_rule_pkt_reformat[i]) { + mlx5_packet_reformat_dealloc(macsec_fs->mdev, + rx_fs->check_rule_pkt_reformat[i]); + rx_fs->check_rule_pkt_reformat[i] = NULL; + } + } + + rx_tables = &rx_fs->tables; + + if (rx_tables->check_miss_rule) { + mlx5_del_flow_rules(rx_tables->check_miss_rule); + rx_tables->check_miss_rule = NULL; + } + + if (rx_tables->ft_check_group) { + mlx5_destroy_flow_group(rx_tables->ft_check_group); + rx_tables->ft_check_group = NULL; + } + + if (rx_tables->ft_check) { + mlx5_destroy_flow_table(rx_tables->ft_check); + rx_tables->ft_check = NULL; + } + + /* Rx crypto table */ + if (rx_tables->crypto_miss_rule) { + mlx5_del_flow_rules(rx_tables->crypto_miss_rule); + rx_tables->crypto_miss_rule = NULL; + } + + macsec_fs_destroy_flow_table(&rx_tables->ft_crypto); + + macsec_fs_rdma_rx_destroy(&macsec_fs->rx_fs->roce, macsec_fs->mdev); +} + +static int macsec_fs_rx_create_crypto_table_groups(struct mlx5_macsec_flow_table *ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int mclen = MLX5_ST_SZ_BYTES(fte_match_param); + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(RX_CRYPTO_TABLE_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + kfree(ft->g); + return -ENOMEM; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + /* Flow group for SA rule with SCI */ + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS | + MLX5_MATCH_MISC_PARAMETERS_5); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + + MLX5_SET(fte_match_param, mc, misc_parameters_5.macsec_tag_0, + MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK << + MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); + MLX5_SET_TO_ONES(fte_match_param, mc, misc_parameters_5.macsec_tag_2); + MLX5_SET_TO_ONES(fte_match_param, mc, misc_parameters_5.macsec_tag_3); + + MLX5_SET_CFG(in, start_flow_index, ix); + ix += RX_CRYPTO_TABLE_SA_RULE_WITH_SCI_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Flow group for SA rule without SCI */ + memset(in, 0, inlen); + memset(mc, 0, mclen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS | + MLX5_MATCH_MISC_PARAMETERS_5); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.smac_15_0); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + + MLX5_SET(fte_match_param, mc, misc_parameters_5.macsec_tag_0, + MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); + + MLX5_SET_CFG(in, start_flow_index, ix); + ix += RX_CRYPTO_TABLE_SA_RULE_WITHOUT_SCI_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Flow Group for l2 traps */ + memset(in, 0, inlen); + memset(mc, 0, mclen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += CRYPTO_TABLE_DEFAULT_RULE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + kvfree(in); + + return err; +} + +static int macsec_fs_rx_create_check_decap_rule(struct mlx5_macsec_fs *macsec_fs, + struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_spec *spec, + int reformat_param_size) +{ + int rule_index = (reformat_param_size == MLX5_SECTAG_HEADER_SIZE_WITH_SCI) ? 0 : 1; + u8 mlx5_reformat_buf[MLX5_SECTAG_HEADER_SIZE_WITH_SCI]; + struct mlx5_pkt_reformat_params reformat_params = {}; + struct mlx5_macsec_rx *rx_fs = macsec_fs->rx_fs; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5_flow_destination roce_dest[2]; + struct mlx5_macsec_tables *rx_tables; + struct mlx5_flow_handle *rule; + int err = 0, dstn = 0; + + rx_tables = &rx_fs->tables; + + /* Rx check table decap 16B rule */ + memset(dest, 0, sizeof(*dest)); + memset(flow_act, 0, sizeof(*flow_act)); + memset(spec, 0, sizeof(*spec)); + + reformat_params.type = MLX5_REFORMAT_TYPE_DEL_MACSEC; + reformat_params.size = reformat_param_size; + reformat_params.data = mlx5_reformat_buf; + flow_act->pkt_reformat = mlx5_packet_reformat_alloc(mdev, + &reformat_params, + MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC); + if (IS_ERR(flow_act->pkt_reformat)) { + err = PTR_ERR(flow_act->pkt_reformat); + mlx5_core_err(mdev, "Failed to allocate MACsec Rx reformat context err=%d\n", err); + return err; + } + rx_fs->check_rule_pkt_reformat[rule_index] = flow_act->pkt_reformat; + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + /* MACsec syndrome match */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.macsec_syndrome); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.macsec_syndrome, 0); + /* ASO return reg syndrome match */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_4); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_4, 0); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5; + /* Sectag TCI SC present bit*/ + MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_5.macsec_tag_0, + MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); + + if (reformat_param_size == MLX5_SECTAG_HEADER_SIZE_WITH_SCI) + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_0, + MLX5_MACSEC_SECTAG_TCI_SC_FIELD_BIT << + MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); + + flow_act->flags = FLOW_ACT_NO_APPEND; + + if (rx_fs->roce.ft) { + flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + roce_dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + roce_dest[dstn].ft = rx_fs->roce.ft; + dstn++; + } else { + flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; + } + + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + roce_dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + roce_dest[dstn].counter_id = mlx5_fc_id(rx_tables->check_rule_counter); + rule = mlx5_add_flow_rules(rx_tables->ft_check, spec, flow_act, roce_dest, dstn + 1); + + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Failed to add MACsec Rx check rule, err=%d\n", err); + return err; + } + + rx_fs->check_rule[rule_index] = rule; + + return 0; +} + +static int macsec_fs_rx_roce_miss_create(struct mlx5_core_dev *mdev, + struct mlx5_macsec_rx_roce *roce) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_group *flow_group; + struct mlx5_flow_handle *rule; + u32 *flow_group_in; + int err; + + flow_group_in = kvzalloc(MLX5_ST_SZ_BYTES(create_flow_group_in), GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + /* IP check ft has no miss rule since we use default miss action which is go to next PRIO */ + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, + roce->ft_macsec_op_check->max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + roce->ft_macsec_op_check->max_fte - 1); + flow_group = mlx5_create_flow_group(roce->ft_macsec_op_check, flow_group_in); + if (IS_ERR(flow_group)) { + err = PTR_ERR(flow_group); + mlx5_core_err(mdev, + "Failed to create miss flow group for MACsec RoCE operation check table err(%d)\n", + err); + goto err_macsec_op_miss_group; + } + roce->miss.g = flow_group; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + rule = mlx5_add_flow_rules(roce->ft_macsec_op_check, NULL, &flow_act, NULL, 0); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Failed to add miss rule to MACsec RoCE operation check table err(%d)\n", + err); + goto err_macsec_op_rule; + } + roce->miss.rule = rule; + + kvfree(flow_group_in); + return 0; + +err_macsec_op_rule: + mlx5_destroy_flow_group(roce->miss.g); +err_macsec_op_miss_group: + kvfree(flow_group_in); + return err; +} + +#define MLX5_RX_ROCE_GROUP_SIZE BIT(0) + +static int macsec_fs_rx_roce_jump_to_rdma_groups_create(struct mlx5_core_dev *mdev, + struct mlx5_macsec_rx_roce *roce) +{ + struct mlx5_flow_group *g; + void *outer_headers_c; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + in = kvzalloc(MLX5_ST_SZ_BYTES(create_flow_group_in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport); + + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_RX_ROCE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + g = mlx5_create_flow_group(roce->ft, in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_err(mdev, "Failed to create main flow group for MACsec RoCE NIC UDP table err(%d)\n", + err); + goto err_udp_group; + } + roce->g = g; + + memset(in, 0, MLX5_ST_SZ_BYTES(create_flow_group_in)); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_RX_ROCE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + g = mlx5_create_flow_group(roce->ft, in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_err(mdev, "Failed to create miss flow group for MACsec RoCE NIC UDP table err(%d)\n", + err); + goto err_udp_miss_group; + } + roce->nic_miss.g = g; + + kvfree(in); + return 0; + +err_udp_miss_group: + mlx5_destroy_flow_group(roce->g); +err_udp_group: + kvfree(in); + return err; +} + +static int macsec_fs_rx_roce_jump_to_rdma_rules_create(struct mlx5_macsec_fs *macsec_fs, + struct mlx5_macsec_rx_roce *roce) +{ + u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5_flow_destination dst = {}; + struct mlx5_modify_hdr *modify_hdr; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_UDP); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.udp_dport); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport, ROCE_V2_UDP_DPORT); + + MLX5_SET(copy_action_in, action, action_type, MLX5_ACTION_TYPE_COPY); + MLX5_SET(copy_action_in, action, src_field, MLX5_ACTION_IN_FIELD_METADATA_REG_B); + MLX5_SET(copy_action_in, action, src_offset, 0); + MLX5_SET(copy_action_in, action, length, 32); + MLX5_SET(copy_action_in, action, dst_field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_5); + MLX5_SET(copy_action_in, action, dst_offset, 0); + + modify_hdr = mlx5_modify_header_alloc(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC, + 1, action); + + if (IS_ERR(modify_hdr)) { + err = PTR_ERR(modify_hdr); + mlx5_core_err(mdev, + "Failed to alloc macsec copy modify_header_id err(%d)\n", err); + goto err_alloc_hdr; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_act.modify_hdr = modify_hdr; + dst.type = MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE; + dst.ft = roce->ft_ip_check; + rule = mlx5_add_flow_rules(roce->ft, spec, &flow_act, &dst, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Failed to add rule to MACsec RoCE NIC UDP table err(%d)\n", + err); + goto err_add_rule; + } + roce->rule = rule; + roce->copy_modify_hdr = modify_hdr; + + memset(&flow_act, 0, sizeof(flow_act)); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; + rule = mlx5_add_flow_rules(roce->ft, NULL, &flow_act, NULL, 0); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Failed to add miss rule to MACsec RoCE NIC UDP table err(%d)\n", + err); + goto err_add_rule2; + } + roce->nic_miss.rule = rule; + + kvfree(spec); + return 0; + +err_add_rule2: + mlx5_del_flow_rules(roce->rule); +err_add_rule: + mlx5_modify_header_dealloc(macsec_fs->mdev, modify_hdr); +err_alloc_hdr: + kvfree(spec); + return err; +} + +static int macsec_fs_rx_roce_jump_to_rdma_create(struct mlx5_macsec_fs *macsec_fs, + struct mlx5_macsec_rx_roce *roce) +{ + int err; + + err = macsec_fs_rx_roce_jump_to_rdma_groups_create(macsec_fs->mdev, roce); + if (err) + return err; + + err = macsec_fs_rx_roce_jump_to_rdma_rules_create(macsec_fs, roce); + if (err) + goto err; + + return 0; +err: + mlx5_destroy_flow_group(roce->nic_miss.g); + mlx5_destroy_flow_group(roce->g); + return err; +} + +static int macsec_fs_rx_roce_create(struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_macsec_rx *rx_fs = macsec_fs->rx_fs; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft; + int err = 0; + + if (!mlx5_is_macsec_roce_supported(macsec_fs->mdev)) { + mlx5_core_dbg(mdev, "Failed to init RoCE MACsec, capabilities not supported\n"); + return 0; + } + + ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_RDMA_RX_MACSEC); + if (!ns) + return -ENOMEM; + + ft = macsec_fs_auto_group_table_create(ns, 0, RDMA_RX_ROCE_IP_TABLE_LEVEL, + CRYPTO_NUM_MAXSEC_FTE); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, + "Failed to create MACsec IP check RoCE table err(%d)\n", err); + return err; + } + rx_fs->roce.ft_ip_check = ft; + + ft = macsec_fs_auto_group_table_create(ns, 0, RDMA_RX_ROCE_MACSEC_OP_TABLE_LEVEL, + CRYPTO_NUM_MAXSEC_FTE); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, + "Failed to create MACsec operation check RoCE table err(%d)\n", + err); + goto err_macsec_op; + } + rx_fs->roce.ft_macsec_op_check = ft; + + err = macsec_fs_rx_roce_miss_create(mdev, &rx_fs->roce); + if (err) + goto err_miss_create; + + ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC); + if (!ns) { + err = -EOPNOTSUPP; + goto err_ns; + } + + ft_attr.level = RX_ROCE_TABLE_LEVEL; + ft_attr.max_fte = RX_ROCE_TABLE_NUM_FTE; + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, + "Failed to create MACsec jump to RX RoCE, NIC table err(%d)\n", err); + goto err_ns; + } + rx_fs->roce.ft = ft; + + err = macsec_fs_rx_roce_jump_to_rdma_create(macsec_fs, &rx_fs->roce); + if (err) + goto err_udp_ft; + + return 0; + +err_udp_ft: + mlx5_destroy_flow_table(rx_fs->roce.ft); +err_ns: + macsec_fs_rx_roce_miss_destroy(&rx_fs->roce.miss); +err_miss_create: + mlx5_destroy_flow_table(rx_fs->roce.ft_macsec_op_check); +err_macsec_op: + mlx5_destroy_flow_table(rx_fs->roce.ft_ip_check); + return err; +} + +static int macsec_fs_rx_create(struct mlx5_macsec_fs *macsec_fs) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_macsec_rx *rx_fs = macsec_fs->rx_fs; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5_macsec_flow_table *ft_crypto; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_destination dest = {}; + struct mlx5_macsec_tables *rx_tables; + struct mlx5_flow_table *flow_table; + struct mlx5_flow_group *flow_group; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + u32 *flow_group_in; + int err; + + ns = mlx5_get_flow_namespace(mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC); + if (!ns) + return -ENOMEM; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) { + err = -ENOMEM; + goto free_spec; + } + + rx_tables = &rx_fs->tables; + ft_crypto = &rx_tables->ft_crypto; + + err = macsec_fs_rx_roce_create(macsec_fs); + if (err) + goto out_flow_group; + + /* Rx crypto table */ + ft_attr.level = RX_CRYPTO_TABLE_LEVEL; + ft_attr.max_fte = CRYPTO_NUM_MAXSEC_FTE; + + flow_table = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(flow_table)) { + err = PTR_ERR(flow_table); + mlx5_core_err(mdev, "Failed to create MACsec Rx crypto table err(%d)\n", err); + goto err; + } + ft_crypto->t = flow_table; + + /* Rx crypto table groups */ + err = macsec_fs_rx_create_crypto_table_groups(ft_crypto); + if (err) { + mlx5_core_err(mdev, + "Failed to create default flow group for MACsec Tx crypto table err(%d)\n", + err); + goto err; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; + rule = mlx5_add_flow_rules(ft_crypto->t, NULL, &flow_act, NULL, 0); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, + "Failed to add MACsec Rx crypto table default miss rule %d\n", + err); + goto err; + } + rx_tables->crypto_miss_rule = rule; + + /* Rx check table */ + flow_table = macsec_fs_auto_group_table_create(ns, + MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT, + RX_CHECK_TABLE_LEVEL, + RX_CHECK_TABLE_NUM_FTE); + if (IS_ERR(flow_table)) { + err = PTR_ERR(flow_table); + mlx5_core_err(mdev, "Fail to create MACsec RX check table, err(%d)\n", err); + goto err; + } + rx_tables->ft_check = flow_table; + + /* Rx check table Default miss group/rule */ + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_table->max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_table->max_fte - 1); + flow_group = mlx5_create_flow_group(rx_tables->ft_check, flow_group_in); + if (IS_ERR(flow_group)) { + err = PTR_ERR(flow_group); + mlx5_core_err(mdev, + "Failed to create default flow group for MACsec Rx check table err(%d)\n", + err); + goto err; + } + rx_tables->ft_check_group = flow_group; + + /* Rx check table default drop rule */ + memset(&flow_act, 0, sizeof(flow_act)); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter_id = mlx5_fc_id(rx_tables->check_miss_rule_counter); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; + rule = mlx5_add_flow_rules(rx_tables->ft_check, NULL, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Failed to added MACsec Rx check drop rule, err(%d)\n", err); + goto err; + } + rx_tables->check_miss_rule = rule; + + /* Rx check table decap rules */ + err = macsec_fs_rx_create_check_decap_rule(macsec_fs, &dest, &flow_act, spec, + MLX5_SECTAG_HEADER_SIZE_WITH_SCI); + if (err) + goto err; + + err = macsec_fs_rx_create_check_decap_rule(macsec_fs, &dest, &flow_act, spec, + MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI); + if (err) + goto err; + + goto out_flow_group; + +err: + macsec_fs_rx_destroy(macsec_fs); +out_flow_group: + kvfree(flow_group_in); +free_spec: + kvfree(spec); + return err; +} + +static int macsec_fs_rx_ft_get(struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables; + int err = 0; + + if (rx_tables->refcnt) + goto out; + + err = macsec_fs_rx_create(macsec_fs); + if (err) + return err; + +out: + rx_tables->refcnt++; + return err; +} + +static void macsec_fs_rx_ft_put(struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables; + + if (--rx_tables->refcnt) + return; + + macsec_fs_rx_destroy(macsec_fs); +} + +static void macsec_fs_rx_del_rule(struct mlx5_macsec_fs *macsec_fs, + struct mlx5_macsec_rx_rule *rx_rule, + void *macdev, u32 fs_id) +{ + int i; + + macsec_fs_id_del(&macsec_fs->macsec_devices_list, fs_id, macdev, + &macsec_fs->fs_id_hash, false); + + for (i = 0; i < RX_NUM_OF_RULES_PER_SA; ++i) { + if (rx_rule->rule[i]) { + mlx5_del_flow_rules(rx_rule->rule[i]); + rx_rule->rule[i] = NULL; + } + } + + if (rx_rule->meta_modhdr) { + mlx5_modify_header_dealloc(macsec_fs->mdev, rx_rule->meta_modhdr); + rx_rule->meta_modhdr = NULL; + } + + kfree(rx_rule); + + macsec_fs_rx_ft_put(macsec_fs); +} + +static void macsec_fs_rx_setup_fte(struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_macsec_rule_attrs *attrs, + bool sci_present) +{ + u8 tci_an = (sci_present << MLX5_MACSEC_SECTAG_TCI_SC_FIELD_OFFSET) | attrs->assoc_num; + struct mlx5_flow_act_crypto_params *crypto_params = &flow_act->crypto; + __be32 *sci_p = (__be32 *)(&attrs->sci); + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + /* MACsec ethertype */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ETH_P_MACSEC); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5; + + /* Sectag AN + TCI SC present bit*/ + MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_5.macsec_tag_0, + MLX5_MACSEC_SECTAG_TCI_AN_FIELD_BITMASK << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_0, + tci_an << MLX5_MACSEC_SECTAG_TCI_AN_FIELD_OFFSET); + + if (sci_present) { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters_5.macsec_tag_2); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_2, + be32_to_cpu(sci_p[0])); + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters_5.macsec_tag_3); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_5.macsec_tag_3, + be32_to_cpu(sci_p[1])); + } else { + /* When SCI isn't present in the Sectag, need to match the source */ + /* MAC address only if the SCI contains the default MACsec PORT */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.smac_47_16), + sci_p, ETH_ALEN); + } + + crypto_params->type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_MACSEC; + crypto_params->obj_id = attrs->macsec_obj_id; +} + +static union mlx5_macsec_rule * +macsec_fs_rx_add_rule(struct mlx5_macsec_fs *macsec_fs, + const struct macsec_context *macsec_ctx, + struct mlx5_macsec_rule_attrs *attrs, + u32 fs_id) +{ + u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + struct mlx5_macsec_rx *rx_fs = macsec_fs->rx_fs; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + union mlx5_macsec_rule *macsec_rule = NULL; + struct mlx5_modify_hdr *modify_hdr = NULL; + struct mlx5_macsec_flow_table *ft_crypto; + struct mlx5_flow_destination dest = {}; + struct mlx5_macsec_tables *rx_tables; + struct mlx5_macsec_rx_rule *rx_rule; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return NULL; + + err = macsec_fs_rx_ft_get(macsec_fs); + if (err) + goto out_spec; + + macsec_rule = kzalloc(sizeof(*macsec_rule), GFP_KERNEL); + if (!macsec_rule) { + macsec_fs_rx_ft_put(macsec_fs); + goto out_spec; + } + + rx_rule = &macsec_rule->rx_rule; + rx_tables = &rx_fs->tables; + ft_crypto = &rx_tables->ft_crypto; + + /* Set bit[31 - 30] macsec marker - 0x01 */ + /* Set bit[15-0] fs id */ + MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_B); + MLX5_SET(set_action_in, action, data, macsec_fs_set_rx_fs_id(fs_id)); + MLX5_SET(set_action_in, action, offset, 0); + MLX5_SET(set_action_in, action, length, 32); + + modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC, + 1, action); + if (IS_ERR(modify_hdr)) { + err = PTR_ERR(modify_hdr); + mlx5_core_err(mdev, "Fail to alloc MACsec set modify_header_id err=%d\n", err); + modify_hdr = NULL; + goto err; + } + rx_rule->meta_modhdr = modify_hdr; + + /* Rx crypto table with SCI rule */ + macsec_fs_rx_setup_fte(spec, &flow_act, attrs, true); + + flow_act.modify_hdr = modify_hdr; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = rx_tables->ft_check; + rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, + "Failed to add SA with SCI rule to Rx crypto rule, err=%d\n", + err); + goto err; + } + rx_rule->rule[0] = rule; + + /* Rx crypto table without SCI rule */ + if ((cpu_to_be64((__force u64)attrs->sci) & 0xFFFF) == ntohs(MACSEC_PORT_ES)) { + memset(spec, 0, sizeof(struct mlx5_flow_spec)); + memset(&dest, 0, sizeof(struct mlx5_flow_destination)); + memset(&flow_act, 0, sizeof(flow_act)); + + macsec_fs_rx_setup_fte(spec, &flow_act, attrs, false); + + flow_act.modify_hdr = modify_hdr; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = rx_tables->ft_check; + rule = mlx5_add_flow_rules(ft_crypto->t, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, + "Failed to add SA without SCI rule to Rx crypto rule, err=%d\n", + err); + goto err; + } + rx_rule->rule[1] = rule; + } + + err = macsec_fs_id_add(&macsec_fs->macsec_devices_list, fs_id, macsec_ctx->secy->netdev, + &macsec_fs->fs_id_hash, attrs->sci, false); + if (err) { + mlx5_core_err(mdev, "Failed to save fs_id, err=%d\n", err); + goto err; + } + + kvfree(spec); + return macsec_rule; + +err: + macsec_fs_rx_del_rule(macsec_fs, rx_rule, macsec_ctx->secy->netdev, fs_id); + macsec_rule = NULL; +out_spec: + kvfree(spec); + return macsec_rule; +} + +static int macsec_fs_rx_init(struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5_macsec_tables *rx_tables; + struct mlx5_macsec_rx *rx_fs; + struct mlx5_fc *flow_counter; + int err; + + rx_fs = kzalloc(sizeof(*rx_fs), GFP_KERNEL); + if (!rx_fs) + return -ENOMEM; + + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + mlx5_core_err(mdev, + "Failed to create MACsec Rx encrypt flow counter, err(%d)\n", + err); + goto err_encrypt_counter; + } + + rx_tables = &rx_fs->tables; + rx_tables->check_rule_counter = flow_counter; + + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + mlx5_core_err(mdev, + "Failed to create MACsec Rx drop flow counter, err(%d)\n", + err); + goto err_drop_counter; + } + rx_tables->check_miss_rule_counter = flow_counter; + + macsec_fs->rx_fs = rx_fs; + + return 0; + +err_drop_counter: + mlx5_fc_destroy(mdev, rx_tables->check_rule_counter); + rx_tables->check_rule_counter = NULL; + +err_encrypt_counter: + kfree(rx_fs); + macsec_fs->rx_fs = NULL; + + return err; +} + +static void macsec_fs_rx_cleanup(struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_macsec_rx *rx_fs = macsec_fs->rx_fs; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5_macsec_tables *rx_tables; + + if (!rx_fs) + return; + + rx_tables = &rx_fs->tables; + + if (rx_tables->refcnt) { + mlx5_core_err(mdev, + "Can't destroy MACsec offload rx_fs, refcnt(%u) isn't 0\n", + rx_tables->refcnt); + return; + } + + if (rx_tables->check_miss_rule_counter) { + mlx5_fc_destroy(mdev, rx_tables->check_miss_rule_counter); + rx_tables->check_miss_rule_counter = NULL; + } + + if (rx_tables->check_rule_counter) { + mlx5_fc_destroy(mdev, rx_tables->check_rule_counter); + rx_tables->check_rule_counter = NULL; + } + + kfree(rx_fs); + macsec_fs->rx_fs = NULL; +} + +static void set_ipaddr_spec_v4(struct sockaddr_in *in, struct mlx5_flow_spec *spec, bool is_dst_ip) +{ + MLX5_SET(fte_match_param, spec->match_value, + outer_headers.ip_version, MLX5_FS_IPV4_VERSION); + + if (is_dst_ip) { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &in->sin_addr.s_addr, 4); + } else { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4), + &in->sin_addr.s_addr, 4); + } +} + +static void set_ipaddr_spec_v6(struct sockaddr_in6 *in6, struct mlx5_flow_spec *spec, + bool is_dst_ip) +{ + MLX5_SET(fte_match_param, spec->match_value, + outer_headers.ip_version, MLX5_FS_IPV6_VERSION); + + if (is_dst_ip) { + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xff, 16); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &in6->sin6_addr, 16); + } else { + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), + 0xff, 16); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), + &in6->sin6_addr, 16); + } +} + +static void set_ipaddr_spec(const struct sockaddr *addr, + struct mlx5_flow_spec *spec, bool is_dst_ip) +{ + struct sockaddr_in6 *in6; + + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.ip_version); + + if (addr->sa_family == AF_INET) { + struct sockaddr_in *in = (struct sockaddr_in *)addr; + + set_ipaddr_spec_v4(in, spec, is_dst_ip); + return; + } + + in6 = (struct sockaddr_in6 *)addr; + set_ipaddr_spec_v6(in6, spec, is_dst_ip); +} + +static void macsec_fs_del_roce_rule_rx(struct mlx5_roce_macsec_rx_rule *rx_rule) +{ + mlx5_del_flow_rules(rx_rule->op); + mlx5_del_flow_rules(rx_rule->ip); + list_del(&rx_rule->entry); + kfree(rx_rule); +} + +static void macsec_fs_del_roce_rules_rx(struct mlx5_macsec_fs *macsec_fs, u32 fs_id, + struct list_head *rx_rules_list) +{ + struct mlx5_roce_macsec_rx_rule *rx_rule, *next; + + if (!mlx5_is_macsec_roce_supported(macsec_fs->mdev)) + return; + + list_for_each_entry_safe(rx_rule, next, rx_rules_list, entry) { + if (rx_rule->fs_id == fs_id) + macsec_fs_del_roce_rule_rx(rx_rule); + } +} + +static void macsec_fs_del_roce_rule_tx(struct mlx5_core_dev *mdev, + struct mlx5_roce_macsec_tx_rule *tx_rule) +{ + mlx5_del_flow_rules(tx_rule->rule); + mlx5_modify_header_dealloc(mdev, tx_rule->meta_modhdr); + list_del(&tx_rule->entry); + kfree(tx_rule); +} + +static void macsec_fs_del_roce_rules_tx(struct mlx5_macsec_fs *macsec_fs, u32 fs_id, + struct list_head *tx_rules_list) +{ + struct mlx5_roce_macsec_tx_rule *tx_rule, *next; + + if (!mlx5_is_macsec_roce_supported(macsec_fs->mdev)) + return; + + list_for_each_entry_safe(tx_rule, next, tx_rules_list, entry) { + if (tx_rule->fs_id == fs_id) + macsec_fs_del_roce_rule_tx(macsec_fs->mdev, tx_rule); + } +} + +void mlx5_macsec_fs_get_stats_fill(struct mlx5_macsec_fs *macsec_fs, void *macsec_stats) +{ + struct mlx5_macsec_stats *stats = (struct mlx5_macsec_stats *)macsec_stats; + struct mlx5_macsec_tables *tx_tables = &macsec_fs->tx_fs->tables; + struct mlx5_macsec_tables *rx_tables = &macsec_fs->rx_fs->tables; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + + if (tx_tables->check_rule_counter) + mlx5_fc_query(mdev, tx_tables->check_rule_counter, + &stats->macsec_tx_pkts, &stats->macsec_tx_bytes); + + if (tx_tables->check_miss_rule_counter) + mlx5_fc_query(mdev, tx_tables->check_miss_rule_counter, + &stats->macsec_tx_pkts_drop, &stats->macsec_tx_bytes_drop); + + if (rx_tables->check_rule_counter) + mlx5_fc_query(mdev, rx_tables->check_rule_counter, + &stats->macsec_rx_pkts, &stats->macsec_rx_bytes); + + if (rx_tables->check_miss_rule_counter) + mlx5_fc_query(mdev, rx_tables->check_miss_rule_counter, + &stats->macsec_rx_pkts_drop, &stats->macsec_rx_bytes_drop); +} + +struct mlx5_macsec_stats *mlx5_macsec_fs_get_stats(struct mlx5_macsec_fs *macsec_fs) +{ + if (!macsec_fs) + return NULL; + + return &macsec_fs->stats; +} + +u32 mlx5_macsec_fs_get_fs_id_from_hashtable(struct mlx5_macsec_fs *macsec_fs, sci_t *sci) +{ + struct mlx5_fs_id *mlx5_fs_id; + u32 fs_id = 0; + + rcu_read_lock(); + mlx5_fs_id = rhashtable_lookup(&macsec_fs->sci_hash, sci, rhash_sci); + if (mlx5_fs_id) + fs_id = mlx5_fs_id->id; + rcu_read_unlock(); + + return fs_id; +} + +union mlx5_macsec_rule * +mlx5_macsec_fs_add_rule(struct mlx5_macsec_fs *macsec_fs, + const struct macsec_context *macsec_ctx, + struct mlx5_macsec_rule_attrs *attrs, + u32 *sa_fs_id) +{ + struct mlx5_macsec_event_data data = {.macsec_fs = macsec_fs, + .macdev = macsec_ctx->secy->netdev, + .is_tx = + (attrs->action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT) + }; + union mlx5_macsec_rule *macsec_rule; + u32 tx_new_fs_id; + + macsec_rule = (attrs->action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT) ? + macsec_fs_tx_add_rule(macsec_fs, macsec_ctx, attrs, &tx_new_fs_id) : + macsec_fs_rx_add_rule(macsec_fs, macsec_ctx, attrs, *sa_fs_id); + + data.fs_id = (data.is_tx) ? tx_new_fs_id : *sa_fs_id; + if (macsec_rule) + blocking_notifier_call_chain(&macsec_fs->mdev->macsec_nh, + MLX5_DRIVER_EVENT_MACSEC_SA_ADDED, + &data); + + return macsec_rule; +} + +void mlx5_macsec_fs_del_rule(struct mlx5_macsec_fs *macsec_fs, + union mlx5_macsec_rule *macsec_rule, + int action, void *macdev, u32 sa_fs_id) +{ + struct mlx5_macsec_event_data data = {.macsec_fs = macsec_fs, + .macdev = macdev, + .is_tx = (action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT) + }; + + data.fs_id = (data.is_tx) ? macsec_rule->tx_rule.fs_id : sa_fs_id; + blocking_notifier_call_chain(&macsec_fs->mdev->macsec_nh, + MLX5_DRIVER_EVENT_MACSEC_SA_DELETED, + &data); + + (action == MLX5_ACCEL_MACSEC_ACTION_ENCRYPT) ? + macsec_fs_tx_del_rule(macsec_fs, &macsec_rule->tx_rule, macdev) : + macsec_fs_rx_del_rule(macsec_fs, &macsec_rule->rx_rule, macdev, sa_fs_id); +} + +static int mlx5_macsec_fs_add_roce_rule_rx(struct mlx5_macsec_fs *macsec_fs, u32 fs_id, u16 gid_idx, + const struct sockaddr *addr, + struct list_head *rx_rules_list) +{ + struct mlx5_macsec_rx *rx_fs = macsec_fs->rx_fs; + struct mlx5_roce_macsec_rx_rule *rx_rule; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *new_rule; + struct mlx5_flow_spec *spec; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + rx_rule = kzalloc(sizeof(*rx_rule), GFP_KERNEL); + if (!rx_rule) { + err = -ENOMEM; + goto out; + } + + set_ipaddr_spec(addr, spec, true); + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest.ft = rx_fs->roce.ft_macsec_op_check; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + new_rule = mlx5_add_flow_rules(rx_fs->roce.ft_ip_check, spec, &flow_act, + &dest, 1); + if (IS_ERR(new_rule)) { + err = PTR_ERR(new_rule); + goto ip_rule_err; + } + rx_rule->ip = new_rule; + + memset(&flow_act, 0, sizeof(flow_act)); + memset(spec, 0, sizeof(*spec)); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_5); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_5, + macsec_fs_set_rx_fs_id(fs_id)); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + new_rule = mlx5_add_flow_rules(rx_fs->roce.ft_macsec_op_check, spec, &flow_act, + NULL, 0); + if (IS_ERR(new_rule)) { + err = PTR_ERR(new_rule); + goto op_rule_err; + } + rx_rule->op = new_rule; + rx_rule->gid_idx = gid_idx; + rx_rule->fs_id = fs_id; + list_add_tail(&rx_rule->entry, rx_rules_list); + + goto out; + +op_rule_err: + mlx5_del_flow_rules(rx_rule->ip); + rx_rule->ip = NULL; +ip_rule_err: + kfree(rx_rule); +out: + kvfree(spec); + return err; +} + +static int mlx5_macsec_fs_add_roce_rule_tx(struct mlx5_macsec_fs *macsec_fs, u32 fs_id, u16 gid_idx, + const struct sockaddr *addr, + struct list_head *tx_rules_list) +{ + u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + struct mlx5_macsec_tx *tx_fs = macsec_fs->tx_fs; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5_modify_hdr *modify_hdr = NULL; + struct mlx5_roce_macsec_tx_rule *tx_rule; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *new_rule; + struct mlx5_flow_spec *spec; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + tx_rule = kzalloc(sizeof(*tx_rule), GFP_KERNEL); + if (!tx_rule) { + err = -ENOMEM; + goto out; + } + + set_ipaddr_spec(addr, spec, false); + + MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_A); + MLX5_SET(set_action_in, action, data, macsec_fs_set_tx_fs_id(fs_id)); + MLX5_SET(set_action_in, action, offset, 0); + MLX5_SET(set_action_in, action, length, 32); + + modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_RDMA_TX_MACSEC, + 1, action); + if (IS_ERR(modify_hdr)) { + err = PTR_ERR(modify_hdr); + mlx5_core_err(mdev, "Fail to alloc ROCE MACsec set modify_header_id err=%d\n", + err); + modify_hdr = NULL; + goto modify_hdr_err; + } + tx_rule->meta_modhdr = modify_hdr; + + flow_act.modify_hdr = modify_hdr; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE; + dest.ft = tx_fs->tables.ft_crypto.t; + new_rule = mlx5_add_flow_rules(tx_fs->ft_rdma_tx, spec, &flow_act, &dest, 1); + if (IS_ERR(new_rule)) { + err = PTR_ERR(new_rule); + mlx5_core_err(mdev, "Failed to add ROCE TX rule, err=%d\n", err); + goto rule_err; + } + tx_rule->rule = new_rule; + tx_rule->gid_idx = gid_idx; + tx_rule->fs_id = fs_id; + list_add_tail(&tx_rule->entry, tx_rules_list); + + goto out; + +rule_err: + mlx5_modify_header_dealloc(mdev, tx_rule->meta_modhdr); +modify_hdr_err: + kfree(tx_rule); +out: + kvfree(spec); + return err; +} + +void mlx5_macsec_del_roce_rule(u16 gid_idx, struct mlx5_macsec_fs *macsec_fs, + struct list_head *tx_rules_list, struct list_head *rx_rules_list) +{ + struct mlx5_roce_macsec_rx_rule *rx_rule, *next_rx; + struct mlx5_roce_macsec_tx_rule *tx_rule, *next_tx; + + list_for_each_entry_safe(tx_rule, next_tx, tx_rules_list, entry) { + if (tx_rule->gid_idx == gid_idx) + macsec_fs_del_roce_rule_tx(macsec_fs->mdev, tx_rule); + } + + list_for_each_entry_safe(rx_rule, next_rx, rx_rules_list, entry) { + if (rx_rule->gid_idx == gid_idx) + macsec_fs_del_roce_rule_rx(rx_rule); + } +} +EXPORT_SYMBOL_GPL(mlx5_macsec_del_roce_rule); + +int mlx5_macsec_add_roce_rule(void *macdev, const struct sockaddr *addr, u16 gid_idx, + struct list_head *tx_rules_list, struct list_head *rx_rules_list, + struct mlx5_macsec_fs *macsec_fs) +{ + struct mlx5_macsec_device *iter, *macsec_device = NULL; + struct mlx5_core_dev *mdev = macsec_fs->mdev; + struct mlx5_fs_id *fs_id_iter; + unsigned long index = 0; + int err; + + list_for_each_entry(iter, &macsec_fs->macsec_devices_list, macsec_devices_list_entry) { + if (iter->macdev == macdev) { + macsec_device = iter; + break; + } + } + + if (!macsec_device) + return 0; + + xa_for_each(&macsec_device->tx_id_xa, index, fs_id_iter) { + err = mlx5_macsec_fs_add_roce_rule_tx(macsec_fs, fs_id_iter->id, gid_idx, addr, + tx_rules_list); + if (err) { + mlx5_core_err(mdev, "MACsec offload: Failed to add roce TX rule\n"); + goto out; + } + } + + index = 0; + xa_for_each(&macsec_device->rx_id_xa, index, fs_id_iter) { + err = mlx5_macsec_fs_add_roce_rule_rx(macsec_fs, fs_id_iter->id, gid_idx, addr, + rx_rules_list); + if (err) { + mlx5_core_err(mdev, "MACsec offload: Failed to add roce TX rule\n"); + goto out; + } + } + + return 0; +out: + mlx5_macsec_del_roce_rule(gid_idx, macsec_fs, tx_rules_list, rx_rules_list); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_macsec_add_roce_rule); + +void mlx5_macsec_add_roce_sa_rules(u32 fs_id, const struct sockaddr *addr, u16 gid_idx, + struct list_head *tx_rules_list, + struct list_head *rx_rules_list, + struct mlx5_macsec_fs *macsec_fs, bool is_tx) +{ + (is_tx) ? + mlx5_macsec_fs_add_roce_rule_tx(macsec_fs, fs_id, gid_idx, addr, + tx_rules_list) : + mlx5_macsec_fs_add_roce_rule_rx(macsec_fs, fs_id, gid_idx, addr, + rx_rules_list); +} +EXPORT_SYMBOL_GPL(mlx5_macsec_add_roce_sa_rules); + +void mlx5_macsec_del_roce_sa_rules(u32 fs_id, struct mlx5_macsec_fs *macsec_fs, + struct list_head *tx_rules_list, + struct list_head *rx_rules_list, bool is_tx) +{ + (is_tx) ? + macsec_fs_del_roce_rules_tx(macsec_fs, fs_id, tx_rules_list) : + macsec_fs_del_roce_rules_rx(macsec_fs, fs_id, rx_rules_list); +} +EXPORT_SYMBOL_GPL(mlx5_macsec_del_roce_sa_rules); + +void mlx5_macsec_fs_cleanup(struct mlx5_macsec_fs *macsec_fs) +{ + macsec_fs_rx_cleanup(macsec_fs); + macsec_fs_tx_cleanup(macsec_fs); + rhashtable_destroy(&macsec_fs->fs_id_hash); + rhashtable_destroy(&macsec_fs->sci_hash); + kfree(macsec_fs); +} + +struct mlx5_macsec_fs * +mlx5_macsec_fs_init(struct mlx5_core_dev *mdev) +{ + struct mlx5_macsec_fs *macsec_fs; + int err; + + macsec_fs = kzalloc(sizeof(*macsec_fs), GFP_KERNEL); + if (!macsec_fs) + return NULL; + + macsec_fs->mdev = mdev; + + err = rhashtable_init(&macsec_fs->sci_hash, &rhash_sci); + if (err) { + mlx5_core_err(mdev, "MACsec offload: Failed to init SCI hash table, err=%d\n", + err); + goto err_hash; + } + + err = rhashtable_init(&macsec_fs->fs_id_hash, &rhash_fs_id); + if (err) { + mlx5_core_err(mdev, "MACsec offload: Failed to init FS_ID hash table, err=%d\n", + err); + goto sci_hash_cleanup; + } + + err = macsec_fs_tx_init(macsec_fs); + if (err) { + mlx5_core_err(mdev, "MACsec offload: Failed to init tx_fs, err=%d\n", err); + goto fs_id_hash_cleanup; + } + + err = macsec_fs_rx_init(macsec_fs); + if (err) { + mlx5_core_err(mdev, "MACsec offload: Failed to init tx_fs, err=%d\n", err); + goto tx_cleanup; + } + + BLOCKING_INIT_NOTIFIER_HEAD(&mdev->macsec_nh); + + return macsec_fs; + +tx_cleanup: + macsec_fs_tx_cleanup(macsec_fs); +fs_id_hash_cleanup: + rhashtable_destroy(&macsec_fs->fs_id_hash); +sci_hash_cleanup: + rhashtable_destroy(&macsec_fs->sci_hash); +err_hash: + kfree(macsec_fs); + return NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.h new file mode 100644 index 0000000000..34b80c3ef6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_MACSEC_STEERING_H__ +#define __MLX5_MACSEC_STEERING_H__ + +#ifdef CONFIG_MLX5_MACSEC + +/* Bit31 - 30: MACsec marker, Bit15-0: MACsec id */ +#define MLX5_MACEC_RX_FS_ID_MAX USHRT_MAX /* Must be power of two */ +#define MLX5_MACSEC_RX_FS_ID_MASK MLX5_MACEC_RX_FS_ID_MAX +#define MLX5_MACSEC_METADATA_MARKER(metadata) ((((metadata) >> 30) & 0x3) == 0x1) +#define MLX5_MACSEC_RX_METADAT_HANDLE(metadata) ((metadata) & MLX5_MACSEC_RX_FS_ID_MASK) + +#define MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES 16 + +struct mlx5_macsec_fs; +union mlx5_macsec_rule; + +struct mlx5_macsec_rule_attrs { + sci_t sci; + u32 macsec_obj_id; + u8 assoc_num; + int action; +}; + +struct mlx5_macsec_stats { + u64 macsec_rx_pkts; + u64 macsec_rx_bytes; + u64 macsec_rx_pkts_drop; + u64 macsec_rx_bytes_drop; + u64 macsec_tx_pkts; + u64 macsec_tx_bytes; + u64 macsec_tx_pkts_drop; + u64 macsec_tx_bytes_drop; +}; + +enum mlx5_macsec_action { + MLX5_ACCEL_MACSEC_ACTION_ENCRYPT, + MLX5_ACCEL_MACSEC_ACTION_DECRYPT, +}; + +void mlx5_macsec_fs_cleanup(struct mlx5_macsec_fs *macsec_fs); + +struct mlx5_macsec_fs * +mlx5_macsec_fs_init(struct mlx5_core_dev *mdev); + +union mlx5_macsec_rule * +mlx5_macsec_fs_add_rule(struct mlx5_macsec_fs *macsec_fs, + const struct macsec_context *ctx, + struct mlx5_macsec_rule_attrs *attrs, + u32 *sa_fs_id); + +void mlx5_macsec_fs_del_rule(struct mlx5_macsec_fs *macsec_fs, + union mlx5_macsec_rule *macsec_rule, + int action, void *macdev, u32 sa_fs_id); + +void mlx5_macsec_fs_get_stats_fill(struct mlx5_macsec_fs *macsec_fs, void *macsec_stats); +struct mlx5_macsec_stats *mlx5_macsec_fs_get_stats(struct mlx5_macsec_fs *macsec_fs); +u32 mlx5_macsec_fs_get_fs_id_from_hashtable(struct mlx5_macsec_fs *macsec_fs, sci_t *sci); + +#endif + +#endif /* __MLX5_MACSEC_STEERING_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h new file mode 100644 index 0000000000..2b5826a785 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __LIB_MLX5_H__ +#define __LIB_MLX5_H__ + +#include "mlx5_core.h" + +void mlx5_init_reserved_gids(struct mlx5_core_dev *dev); +void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev); +int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count); +void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count); +int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index); +void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index); +int mlx5_crdump_enable(struct mlx5_core_dev *dev); +void mlx5_crdump_disable(struct mlx5_core_dev *dev); +int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data); + +static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev) +{ + return devlink_net(priv_to_devlink(dev)); +} + +static inline struct net_device *mlx5_uplink_netdev_get(struct mlx5_core_dev *mdev) +{ + return mdev->mlx5e_res.uplink_netdev; +} +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c new file mode 100644 index 0000000000..4450091e18 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include "mlx5_core.h" +#include "lib/mpfs.h" + +/* HW L2 Table (MPFS) management */ +static int set_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index, u8 *mac) +{ + u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {}; + u8 *in_mac_addr; + + MLX5_SET(set_l2_table_entry_in, in, opcode, MLX5_CMD_OP_SET_L2_TABLE_ENTRY); + MLX5_SET(set_l2_table_entry_in, in, table_index, index); + + in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address); + ether_addr_copy(&in_mac_addr[2], mac); + + return mlx5_cmd_exec_in(dev, set_l2_table_entry, in); +} + +static int del_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index) +{ + u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)] = {}; + + MLX5_SET(delete_l2_table_entry_in, in, opcode, MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); + MLX5_SET(delete_l2_table_entry_in, in, table_index, index); + return mlx5_cmd_exec_in(dev, delete_l2_table_entry, in); +} + +/* UC L2 table hash node */ +struct l2table_node { + struct l2addr_node node; + u32 index; /* index in HW l2 table */ + int ref_count; +}; + +struct mlx5_mpfs { + struct hlist_head hash[MLX5_L2_ADDR_HASH_SIZE]; + struct mutex lock; /* Synchronize l2 table access */ + u32 size; + unsigned long *bitmap; +}; + +static int alloc_l2table_index(struct mlx5_mpfs *l2table, u32 *ix) +{ + int err = 0; + + *ix = find_first_zero_bit(l2table->bitmap, l2table->size); + if (*ix >= l2table->size) + err = -ENOSPC; + else + __set_bit(*ix, l2table->bitmap); + + return err; +} + +static void free_l2table_index(struct mlx5_mpfs *l2table, u32 ix) +{ + __clear_bit(ix, l2table->bitmap); +} + +int mlx5_mpfs_init(struct mlx5_core_dev *dev) +{ + int l2table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); + struct mlx5_mpfs *mpfs; + + if (!MLX5_ESWITCH_MANAGER(dev) || l2table_size == 1) + return 0; + + mpfs = kzalloc(sizeof(*mpfs), GFP_KERNEL); + if (!mpfs) + return -ENOMEM; + + mutex_init(&mpfs->lock); + mpfs->size = l2table_size; + mpfs->bitmap = bitmap_zalloc(l2table_size, GFP_KERNEL); + if (!mpfs->bitmap) { + kfree(mpfs); + return -ENOMEM; + } + + dev->priv.mpfs = mpfs; + return 0; +} + +void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_mpfs *mpfs = dev->priv.mpfs; + + if (!mpfs) + return; + + WARN_ON(!hlist_empty(mpfs->hash)); + bitmap_free(mpfs->bitmap); + kfree(mpfs); +} + +int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) +{ + struct mlx5_mpfs *mpfs = dev->priv.mpfs; + struct l2table_node *l2addr; + int err = 0; + u32 index; + + if (!mpfs) + return 0; + + mutex_lock(&mpfs->lock); + + l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node); + if (l2addr) { + l2addr->ref_count++; + goto out; + } + + err = alloc_l2table_index(mpfs, &index); + if (err) + goto out; + + l2addr = l2addr_hash_add(mpfs->hash, mac, struct l2table_node, GFP_KERNEL); + if (!l2addr) { + err = -ENOMEM; + goto hash_add_err; + } + + err = set_l2table_entry_cmd(dev, index, mac); + if (err) + goto set_table_entry_err; + + l2addr->index = index; + l2addr->ref_count = 1; + + mlx5_core_dbg(dev, "MPFS mac added %pM, index (%d)\n", mac, index); + goto out; + +set_table_entry_err: + l2addr_hash_del(l2addr); +hash_add_err: + free_l2table_index(mpfs, index); +out: + mutex_unlock(&mpfs->lock); + return err; +} +EXPORT_SYMBOL(mlx5_mpfs_add_mac); + +int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) +{ + struct mlx5_mpfs *mpfs = dev->priv.mpfs; + struct l2table_node *l2addr; + int err = 0; + u32 index; + + if (!mpfs) + return 0; + + mutex_lock(&mpfs->lock); + + l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node); + if (!l2addr) { + err = -ENOENT; + goto unlock; + } + + if (--l2addr->ref_count > 0) + goto unlock; + + index = l2addr->index; + del_l2table_entry_cmd(dev, index); + l2addr_hash_del(l2addr); + free_l2table_index(mpfs, index); + mlx5_core_dbg(dev, "MPFS mac deleted %pM, index (%d)\n", mac, index); +unlock: + mutex_unlock(&mpfs->lock); + return err; +} +EXPORT_SYMBOL(mlx5_mpfs_del_mac); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h new file mode 100644 index 0000000000..4a293542a7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_MPFS_H__ +#define __MLX5_MPFS_H__ + +#include +#include + +/* L2 -mac address based- hash helpers */ +#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE)) +#define MLX5_L2_ADDR_HASH(addr) (addr[5]) + +struct l2addr_node { + struct hlist_node hlist; + u8 addr[ETH_ALEN]; +}; + +#define for_each_l2hash_node(hn, tmp, hash, i) \ + for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \ + hlist_for_each_entry_safe(hn, tmp, &(hash)[i], hlist) + +#define l2addr_hash_find(hash, mac, type) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + bool found = false; \ + type *ptr = NULL; \ + \ + hlist_for_each_entry(ptr, &(hash)[ix], node.hlist) \ + if (ether_addr_equal(ptr->node.addr, mac)) {\ + found = true; \ + break; \ + } \ + if (!found) \ + ptr = NULL; \ + ptr; \ +}) + +#define l2addr_hash_add(hash, mac, type, gfp) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + type *ptr = NULL; \ + \ + ptr = kzalloc(sizeof(type), gfp); \ + if (ptr) { \ + ether_addr_copy(ptr->node.addr, mac); \ + hlist_add_head(&ptr->node.hlist, &(hash)[ix]);\ + } \ + ptr; \ +}) + +#define l2addr_hash_del(ptr) ({ \ + hlist_del(&(ptr)->node.hlist); \ + kfree(ptr); \ +}) + +#ifdef CONFIG_MLX5_MPFS +int mlx5_mpfs_init(struct mlx5_core_dev *dev); +void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev); +#else /* #ifndef CONFIG_MLX5_MPFS */ +static inline int mlx5_mpfs_init(struct mlx5_core_dev *dev) { return 0; } +static inline void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) {} +#endif + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c new file mode 100644 index 0000000000..6b774e0c27 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c @@ -0,0 +1,316 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies */ + +#include +#include "mlx5_core.h" +#include "pci_vsc.h" + +#define MLX5_EXTRACT_C(source, offset, size) \ + ((((u32)(source)) >> (offset)) & MLX5_ONES32(size)) +#define MLX5_EXTRACT(src, start, len) \ + (((len) == 32) ? (src) : MLX5_EXTRACT_C(src, start, len)) +#define MLX5_ONES32(size) \ + ((size) ? (0xffffffff >> (32 - (size))) : 0) +#define MLX5_MASK32(offset, size) \ + (MLX5_ONES32(size) << (offset)) +#define MLX5_MERGE_C(rsrc1, rsrc2, start, len) \ + ((((rsrc2) << (start)) & (MLX5_MASK32((start), (len)))) | \ + ((rsrc1) & (~MLX5_MASK32((start), (len))))) +#define MLX5_MERGE(rsrc1, rsrc2, start, len) \ + (((len) == 32) ? (rsrc2) : MLX5_MERGE_C(rsrc1, rsrc2, start, len)) +#define vsc_read(dev, offset, val) \ + pci_read_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val)) +#define vsc_write(dev, offset, val) \ + pci_write_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val)) +#define VSC_MAX_RETRIES 2048 + +enum { + VSC_CTRL_OFFSET = 0x4, + VSC_COUNTER_OFFSET = 0x8, + VSC_SEMAPHORE_OFFSET = 0xc, + VSC_ADDR_OFFSET = 0x10, + VSC_DATA_OFFSET = 0x14, + + VSC_FLAG_BIT_OFFS = 31, + VSC_FLAG_BIT_LEN = 1, + + VSC_SYND_BIT_OFFS = 30, + VSC_SYND_BIT_LEN = 1, + + VSC_ADDR_BIT_OFFS = 0, + VSC_ADDR_BIT_LEN = 30, + + VSC_SPACE_BIT_OFFS = 0, + VSC_SPACE_BIT_LEN = 16, + + VSC_SIZE_VLD_BIT_OFFS = 28, + VSC_SIZE_VLD_BIT_LEN = 1, + + VSC_STATUS_BIT_OFFS = 29, + VSC_STATUS_BIT_LEN = 3, +}; + +void mlx5_pci_vsc_init(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_pf(dev)) + return; + + dev->vsc_addr = pci_find_capability(dev->pdev, + PCI_CAP_ID_VNDR); + if (!dev->vsc_addr) + mlx5_core_warn(dev, "Failed to get valid vendor specific ID\n"); +} + +int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev) +{ + u32 counter = 0; + int retries = 0; + u32 lock_val; + int ret; + + pci_cfg_access_lock(dev->pdev); + do { + if (retries > VSC_MAX_RETRIES) { + ret = -EBUSY; + goto pci_unlock; + } + + /* Check if semaphore is already locked */ + ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val); + if (ret) + goto pci_unlock; + + if (lock_val) { + retries++; + usleep_range(1000, 2000); + continue; + } + + /* Read and write counter value, if written value is + * the same, semaphore was acquired successfully. + */ + ret = vsc_read(dev, VSC_COUNTER_OFFSET, &counter); + if (ret) + goto pci_unlock; + + ret = vsc_write(dev, VSC_SEMAPHORE_OFFSET, counter); + if (ret) + goto pci_unlock; + + ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val); + if (ret) + goto pci_unlock; + + retries++; + } while (counter != lock_val); + + return 0; + +pci_unlock: + pci_cfg_access_unlock(dev->pdev); + return ret; +} + +int mlx5_vsc_gw_unlock(struct mlx5_core_dev *dev) +{ + int ret; + + ret = vsc_write(dev, VSC_SEMAPHORE_OFFSET, MLX5_VSC_UNLOCK); + pci_cfg_access_unlock(dev->pdev); + return ret; +} + +int mlx5_vsc_gw_set_space(struct mlx5_core_dev *dev, u16 space, + u32 *ret_space_size) +{ + int ret; + u32 val = 0; + + if (!mlx5_vsc_accessible(dev)) + return -EINVAL; + + if (ret_space_size) + *ret_space_size = 0; + + /* Get a unique val */ + ret = vsc_read(dev, VSC_CTRL_OFFSET, &val); + if (ret) + goto out; + + /* Try to modify the lock */ + val = MLX5_MERGE(val, space, VSC_SPACE_BIT_OFFS, VSC_SPACE_BIT_LEN); + ret = vsc_write(dev, VSC_CTRL_OFFSET, val); + if (ret) + goto out; + + /* Verify lock was modified */ + ret = vsc_read(dev, VSC_CTRL_OFFSET, &val); + if (ret) + goto out; + + if (MLX5_EXTRACT(val, VSC_STATUS_BIT_OFFS, VSC_STATUS_BIT_LEN) == 0) + return -EINVAL; + + /* Get space max address if indicated by size valid bit */ + if (ret_space_size && + MLX5_EXTRACT(val, VSC_SIZE_VLD_BIT_OFFS, VSC_SIZE_VLD_BIT_LEN)) { + ret = vsc_read(dev, VSC_ADDR_OFFSET, &val); + if (ret) { + mlx5_core_warn(dev, "Failed to get max space size\n"); + goto out; + } + *ret_space_size = MLX5_EXTRACT(val, VSC_ADDR_BIT_OFFS, + VSC_ADDR_BIT_LEN); + } + return 0; + +out: + return ret; +} + +static int mlx5_vsc_wait_on_flag(struct mlx5_core_dev *dev, u8 expected_val) +{ + int retries = 0; + u32 flag; + int ret; + + do { + if (retries > VSC_MAX_RETRIES) + return -EBUSY; + + ret = vsc_read(dev, VSC_ADDR_OFFSET, &flag); + if (ret) + return ret; + flag = MLX5_EXTRACT(flag, VSC_FLAG_BIT_OFFS, VSC_FLAG_BIT_LEN); + retries++; + + if ((retries & 0xf) == 0) + usleep_range(1000, 2000); + + } while (flag != expected_val); + + return 0; +} + +static int mlx5_vsc_gw_write(struct mlx5_core_dev *dev, unsigned int address, + u32 data) +{ + int ret; + + if (MLX5_EXTRACT(address, VSC_SYND_BIT_OFFS, + VSC_FLAG_BIT_LEN + VSC_SYND_BIT_LEN)) + return -EINVAL; + + /* Set flag to 0x1 */ + address = MLX5_MERGE(address, 1, VSC_FLAG_BIT_OFFS, 1); + ret = vsc_write(dev, VSC_DATA_OFFSET, data); + if (ret) + goto out; + + ret = vsc_write(dev, VSC_ADDR_OFFSET, address); + if (ret) + goto out; + + /* Wait for the flag to be cleared */ + ret = mlx5_vsc_wait_on_flag(dev, 0); + +out: + return ret; +} + +static int mlx5_vsc_gw_read(struct mlx5_core_dev *dev, unsigned int address, + u32 *data) +{ + int ret; + + if (MLX5_EXTRACT(address, VSC_SYND_BIT_OFFS, + VSC_FLAG_BIT_LEN + VSC_SYND_BIT_LEN)) + return -EINVAL; + + ret = vsc_write(dev, VSC_ADDR_OFFSET, address); + if (ret) + goto out; + + ret = mlx5_vsc_wait_on_flag(dev, 1); + if (ret) + goto out; + + ret = vsc_read(dev, VSC_DATA_OFFSET, data); +out: + return ret; +} + +static int mlx5_vsc_gw_read_fast(struct mlx5_core_dev *dev, + unsigned int read_addr, + unsigned int *next_read_addr, + u32 *data) +{ + int ret; + + ret = mlx5_vsc_gw_read(dev, read_addr, data); + if (ret) + goto out; + + ret = vsc_read(dev, VSC_ADDR_OFFSET, next_read_addr); + if (ret) + goto out; + + *next_read_addr = MLX5_EXTRACT(*next_read_addr, VSC_ADDR_BIT_OFFS, + VSC_ADDR_BIT_LEN); + + if (*next_read_addr <= read_addr) + ret = -EINVAL; +out: + return ret; +} + +int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data, + int length) +{ + unsigned int next_read_addr = 0; + unsigned int read_addr = 0; + + while (read_addr < length) { + if (mlx5_vsc_gw_read_fast(dev, read_addr, &next_read_addr, + &data[(read_addr >> 2)])) + return read_addr; + + read_addr = next_read_addr; + } + return length; +} + +int mlx5_vsc_sem_set_space(struct mlx5_core_dev *dev, u16 space, + enum mlx5_vsc_state state) +{ + u32 data, id = 0; + int ret; + + ret = mlx5_vsc_gw_set_space(dev, MLX5_SEMAPHORE_SPACE_DOMAIN, NULL); + if (ret) { + mlx5_core_warn(dev, "Failed to set gw space %d\n", ret); + return ret; + } + + if (state == MLX5_VSC_LOCK) { + /* Get a unique ID based on the counter */ + ret = vsc_read(dev, VSC_COUNTER_OFFSET, &id); + if (ret) + return ret; + } + + /* Try to modify lock */ + ret = mlx5_vsc_gw_write(dev, space, id); + if (ret) + return ret; + + /* Verify lock was modified */ + ret = mlx5_vsc_gw_read(dev, space, &data); + if (ret) + return -EINVAL; + + if (data != id) + return -EBUSY; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h new file mode 100644 index 0000000000..64272a6d77 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies */ + +#ifndef __MLX5_PCI_VSC_H__ +#define __MLX5_PCI_VSC_H__ + +enum mlx5_vsc_state { + MLX5_VSC_UNLOCK, + MLX5_VSC_LOCK, +}; + +enum { + MLX5_VSC_SPACE_SCAN_CRSPACE = 0x7, +}; + +void mlx5_pci_vsc_init(struct mlx5_core_dev *dev); +int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev); +int mlx5_vsc_gw_unlock(struct mlx5_core_dev *dev); +int mlx5_vsc_gw_set_space(struct mlx5_core_dev *dev, u16 space, + u32 *ret_space_size); +int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data, + int length); + +static inline bool mlx5_vsc_accessible(struct mlx5_core_dev *dev) +{ + return !!dev->vsc_addr; +} + +int mlx5_vsc_sem_set_space(struct mlx5_core_dev *dev, u16 space, + enum mlx5_vsc_state state); + +#endif /* __MLX5_PCI_VSC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c new file mode 100644 index 0000000000..4571c56ec3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c @@ -0,0 +1,186 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include +#include +#include "mlx5_core.h" +#include "lib/port_tun.h" + +struct mlx5_port_tun_entropy_flags { + bool force_supported, force_enabled; + bool calc_supported, calc_enabled; + bool gre_calc_supported, gre_calc_enabled; +}; + +static void mlx5_query_port_tun_entropy(struct mlx5_core_dev *mdev, + struct mlx5_port_tun_entropy_flags *entropy_flags) +{ + u32 out[MLX5_ST_SZ_DW(pcmr_reg)]; + /* Default values for FW which do not support MLX5_REG_PCMR */ + entropy_flags->force_supported = false; + entropy_flags->calc_supported = false; + entropy_flags->gre_calc_supported = false; + entropy_flags->force_enabled = false; + entropy_flags->calc_enabled = true; + entropy_flags->gre_calc_enabled = true; + + if (!MLX5_CAP_GEN(mdev, ports_check)) + return; + + if (mlx5_query_ports_check(mdev, out, sizeof(out))) + return; + + entropy_flags->force_supported = !!(MLX5_GET(pcmr_reg, out, entropy_force_cap)); + entropy_flags->calc_supported = !!(MLX5_GET(pcmr_reg, out, entropy_calc_cap)); + entropy_flags->gre_calc_supported = !!(MLX5_GET(pcmr_reg, out, entropy_gre_calc_cap)); + entropy_flags->force_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_force)); + entropy_flags->calc_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_calc)); + entropy_flags->gre_calc_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_gre_calc)); +} + +static int mlx5_set_port_tun_entropy_calc(struct mlx5_core_dev *mdev, u8 enable, + u8 force) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0}; + int err; + + err = mlx5_query_ports_check(mdev, in, sizeof(in)); + if (err) + return err; + MLX5_SET(pcmr_reg, in, local_port, 1); + MLX5_SET(pcmr_reg, in, entropy_force, force); + MLX5_SET(pcmr_reg, in, entropy_calc, enable); + return mlx5_set_ports_check(mdev, in, sizeof(in)); +} + +static int mlx5_set_port_gre_tun_entropy_calc(struct mlx5_core_dev *mdev, + u8 enable, u8 force) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0}; + int err; + + err = mlx5_query_ports_check(mdev, in, sizeof(in)); + if (err) + return err; + MLX5_SET(pcmr_reg, in, local_port, 1); + MLX5_SET(pcmr_reg, in, entropy_force, force); + MLX5_SET(pcmr_reg, in, entropy_gre_calc, enable); + return mlx5_set_ports_check(mdev, in, sizeof(in)); +} + +void mlx5_init_port_tun_entropy(struct mlx5_tun_entropy *tun_entropy, + struct mlx5_core_dev *mdev) +{ + struct mlx5_port_tun_entropy_flags entropy_flags; + + tun_entropy->mdev = mdev; + mutex_init(&tun_entropy->lock); + mlx5_query_port_tun_entropy(mdev, &entropy_flags); + tun_entropy->num_enabling_entries = 0; + tun_entropy->num_disabling_entries = 0; + tun_entropy->enabled = entropy_flags.calc_supported ? + entropy_flags.calc_enabled : true; +} + +static int mlx5_set_entropy(struct mlx5_tun_entropy *tun_entropy, + int reformat_type, bool enable) +{ + struct mlx5_port_tun_entropy_flags entropy_flags; + int err; + + mlx5_query_port_tun_entropy(tun_entropy->mdev, &entropy_flags); + /* Tunnel entropy calculation may be controlled either on port basis + * for all tunneling protocols or specifically for GRE protocol. + * Prioritize GRE protocol control (if capable) over global port + * configuration. + */ + if (entropy_flags.gre_calc_supported && + reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE) { + if (!entropy_flags.force_supported) + return 0; + err = mlx5_set_port_gre_tun_entropy_calc(tun_entropy->mdev, + enable, !enable); + if (err) + return err; + } else if (entropy_flags.calc_supported) { + /* Other applications may change the global FW entropy + * calculations settings. Check that the current entropy value + * is the negative of the updated value. + */ + if (entropy_flags.force_enabled && + enable == entropy_flags.calc_enabled) { + mlx5_core_warn(tun_entropy->mdev, + "Unexpected entropy calc setting - expected %d", + !entropy_flags.calc_enabled); + return -EOPNOTSUPP; + } + /* GRE requires disabling entropy calculation. if there are + * enabling entries (i.e VXLAN) we cannot turn it off for them, + * thus fail. + */ + if (tun_entropy->num_enabling_entries) + return -EOPNOTSUPP; + err = mlx5_set_port_tun_entropy_calc(tun_entropy->mdev, enable, + entropy_flags.force_supported); + if (err) + return err; + tun_entropy->enabled = enable; + /* if we turn on the entropy we don't need to force it anymore */ + if (entropy_flags.force_supported && enable) { + err = mlx5_set_port_tun_entropy_calc(tun_entropy->mdev, 1, 0); + if (err) + return err; + } + } + + return 0; +} + +/* the function manages the refcount for enabling/disabling tunnel types. + * the return value indicates if the inc is successful or not, depending on + * entropy capabilities and configuration. + */ +int mlx5_tun_entropy_refcount_inc(struct mlx5_tun_entropy *tun_entropy, + int reformat_type) +{ + int err = -EOPNOTSUPP; + + mutex_lock(&tun_entropy->lock); + if ((reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN || + reformat_type == MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL) && + tun_entropy->enabled) { + /* in case entropy calculation is enabled for all tunneling + * types, it is ok for VXLAN, so approve. + * otherwise keep the error default. + */ + tun_entropy->num_enabling_entries++; + err = 0; + } else if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE) { + /* turn off the entropy only for the first GRE rule. + * for the next rules the entropy was already disabled + * successfully. + */ + if (tun_entropy->num_disabling_entries == 0) + err = mlx5_set_entropy(tun_entropy, reformat_type, 0); + else + err = 0; + if (!err) + tun_entropy->num_disabling_entries++; + } + mutex_unlock(&tun_entropy->lock); + + return err; +} + +void mlx5_tun_entropy_refcount_dec(struct mlx5_tun_entropy *tun_entropy, + int reformat_type) +{ + mutex_lock(&tun_entropy->lock); + if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN) + tun_entropy->num_enabling_entries--; + else if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE && + --tun_entropy->num_disabling_entries == 0) + mlx5_set_entropy(tun_entropy, reformat_type, 1); + mutex_unlock(&tun_entropy->lock); +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h new file mode 100644 index 0000000000..54c42a8870 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_PORT_TUN_H__ +#define __MLX5_PORT_TUN_H__ + +#include + +struct mlx5_tun_entropy { + struct mlx5_core_dev *mdev; + u32 num_enabling_entries; + u32 num_disabling_entries; + u8 enabled; + struct mutex lock; /* lock the entropy fields */ +}; + +void mlx5_init_port_tun_entropy(struct mlx5_tun_entropy *tun_entropy, + struct mlx5_core_dev *mdev); +int mlx5_tun_entropy_refcount_inc(struct mlx5_tun_entropy *tun_entropy, + int reformat_type); +void mlx5_tun_entropy_refcount_dec(struct mlx5_tun_entropy *tun_entropy, + int reformat_type); + +#endif /* __MLX5_PORT_TUN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h new file mode 100644 index 0000000000..84e5683861 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies Ltd */ + +#ifndef __LIB_MLX5_SF_H__ +#define __LIB_MLX5_SF_H__ + +#include + +static inline u16 mlx5_sf_start_function_id(const struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN(dev, sf_base_id); +} + +#ifdef CONFIG_MLX5_SF + +static inline bool mlx5_sf_supported(const struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN(dev, sf); +} + +static inline u16 mlx5_sf_max_functions(const struct mlx5_core_dev *dev) +{ + if (!mlx5_sf_supported(dev)) + return 0; + if (MLX5_CAP_GEN(dev, max_num_sf)) + return MLX5_CAP_GEN(dev, max_num_sf); + else + return 1 << MLX5_CAP_GEN(dev, log_max_sf); +} + +#else + +static inline bool mlx5_sf_supported(const struct mlx5_core_dev *dev) +{ + return false; +} + +static inline u16 mlx5_sf_max_functions(const struct mlx5_core_dev *dev) +{ + return 0; +} + +#endif + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c new file mode 100644 index 0000000000..9b8c051ccf --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#include +#include + +#include "smfs.h" + +struct mlx5dr_matcher * +mlx5_smfs_matcher_create(struct mlx5dr_table *table, u32 priority, struct mlx5_flow_spec *spec) +{ + struct mlx5dr_match_parameters matcher_mask = {}; + + matcher_mask.match_buf = (u64 *)&spec->match_criteria; + matcher_mask.match_sz = DR_SZ_MATCH_PARAM; + + return mlx5dr_matcher_create(table, priority, spec->match_criteria_enable, &matcher_mask); +} + +void +mlx5_smfs_matcher_destroy(struct mlx5dr_matcher *matcher) +{ + mlx5dr_matcher_destroy(matcher); +} + +struct mlx5dr_table * +mlx5_smfs_table_get_from_fs_ft(struct mlx5_flow_table *ft) +{ + return mlx5dr_table_get_from_fs_ft(ft); +} + +struct mlx5dr_action * +mlx5_smfs_action_create_dest_table(struct mlx5dr_table *table) +{ + return mlx5dr_action_create_dest_table(table); +} + +struct mlx5dr_action * +mlx5_smfs_action_create_flow_counter(u32 counter_id) +{ + return mlx5dr_action_create_flow_counter(counter_id); +} + +void +mlx5_smfs_action_destroy(struct mlx5dr_action *action) +{ + mlx5dr_action_destroy(action); +} + +struct mlx5dr_rule * +mlx5_smfs_rule_create(struct mlx5dr_matcher *matcher, struct mlx5_flow_spec *spec, + size_t num_actions, struct mlx5dr_action *actions[], + u32 flow_source) +{ + struct mlx5dr_match_parameters value = {}; + + value.match_buf = (u64 *)spec->match_value; + value.match_sz = DR_SZ_MATCH_PARAM; + + return mlx5dr_rule_create(matcher, &value, num_actions, actions, flow_source); +} + +void +mlx5_smfs_rule_destroy(struct mlx5dr_rule *rule) +{ + mlx5dr_rule_destroy(rule); +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h new file mode 100644 index 0000000000..452d0df339 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/smfs.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */ + +#ifndef __MLX5_LIB_SMFS_H__ +#define __MLX5_LIB_SMFS_H__ + +#include "steering/mlx5dr.h" +#include "steering/dr_types.h" + +struct mlx5dr_matcher * +mlx5_smfs_matcher_create(struct mlx5dr_table *table, u32 priority, struct mlx5_flow_spec *spec); + +void +mlx5_smfs_matcher_destroy(struct mlx5dr_matcher *matcher); + +struct mlx5dr_table * +mlx5_smfs_table_get_from_fs_ft(struct mlx5_flow_table *ft); + +struct mlx5dr_action * +mlx5_smfs_action_create_dest_table(struct mlx5dr_table *table); + +struct mlx5dr_action * +mlx5_smfs_action_create_flow_counter(u32 counter_id); + +void +mlx5_smfs_action_destroy(struct mlx5dr_action *action); + +struct mlx5dr_rule * +mlx5_smfs_rule_create(struct mlx5dr_matcher *matcher, struct mlx5_flow_spec *spec, + size_t num_actions, struct mlx5dr_action *actions[], + u32 flow_source); + +void +mlx5_smfs_rule_destroy(struct mlx5dr_rule *rule); + +#endif /* __MLX5_LIB_SMFS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c new file mode 100644 index 0000000000..e223e0e464 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include +#include "lib/tout.h" + +struct mlx5_timeouts { + u64 to[MAX_TIMEOUT_TYPES]; +}; + +static const u32 tout_def_sw_val[MAX_TIMEOUT_TYPES] = { + [MLX5_TO_FW_PRE_INIT_TIMEOUT_MS] = 120000, + [MLX5_TO_FW_PRE_INIT_ON_RECOVERY_TIMEOUT_MS] = 7200000, + [MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS] = 20000, + [MLX5_TO_FW_PRE_INIT_WAIT_MS] = 2, + [MLX5_TO_FW_INIT_MS] = 2000, + [MLX5_TO_CMD_MS] = 60000, + [MLX5_TO_PCI_TOGGLE_MS] = 2000, + [MLX5_TO_HEALTH_POLL_INTERVAL_MS] = 2000, + [MLX5_TO_FULL_CRDUMP_MS] = 60000, + [MLX5_TO_FW_RESET_MS] = 60000, + [MLX5_TO_FLUSH_ON_ERROR_MS] = 2000, + [MLX5_TO_PCI_SYNC_UPDATE_MS] = 5000, + [MLX5_TO_TEARDOWN_MS] = 3000, + [MLX5_TO_FSM_REACTIVATE_MS] = 5000, + [MLX5_TO_RECLAIM_PAGES_MS] = 5000, + [MLX5_TO_RECLAIM_VFS_PAGES_MS] = 120000, + [MLX5_TO_RESET_UNLOAD_MS] = 300000 +}; + +static void tout_set(struct mlx5_core_dev *dev, u64 val, enum mlx5_timeouts_types type) +{ + dev->timeouts->to[type] = val; +} + +int mlx5_tout_init(struct mlx5_core_dev *dev) +{ + int i; + + dev->timeouts = kmalloc(sizeof(*dev->timeouts), GFP_KERNEL); + if (!dev->timeouts) + return -ENOMEM; + + for (i = 0; i < MAX_TIMEOUT_TYPES; i++) + tout_set(dev, tout_def_sw_val[i], i); + + return 0; +} + +void mlx5_tout_cleanup(struct mlx5_core_dev *dev) +{ + kfree(dev->timeouts); +} + +/* Time register consists of two fields to_multiplier(time out multiplier) + * and to_value(time out value). to_value is the quantity of the time units and + * to_multiplier is the type and should be one off these four values. + * 0x0: millisecond + * 0x1: seconds + * 0x2: minutes + * 0x3: hours + * this function converts the time stored in the two register fields into + * millisecond. + */ +static u64 tout_convert_reg_field_to_ms(u32 to_mul, u32 to_val) +{ + u64 msec = to_val; + + to_mul &= 0x3; + /* convert hours/minutes/seconds to miliseconds */ + if (to_mul) + msec *= 1000 * int_pow(60, to_mul - 1); + + return msec; +} + +static u64 tout_convert_iseg_to_ms(u32 iseg_to) +{ + return tout_convert_reg_field_to_ms(iseg_to >> 29, iseg_to & 0xfffff); +} + +static bool tout_is_supported(struct mlx5_core_dev *dev) +{ + return !!ioread32be(&dev->iseg->cmd_q_init_to); +} + +void mlx5_tout_query_iseg(struct mlx5_core_dev *dev) +{ + u32 to; + + if (!tout_is_supported(dev)) + return; + + to = ioread32be(&dev->iseg->cmd_q_init_to); + tout_set(dev, tout_convert_iseg_to_ms(to), MLX5_TO_FW_INIT_MS); + + to = ioread32be(&dev->iseg->cmd_exec_to); + tout_set(dev, tout_convert_iseg_to_ms(to), MLX5_TO_CMD_MS); +} + +u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type) +{ + return dev->timeouts->to[type]; +} + +#define MLX5_TIMEOUT_QUERY(fld, reg_out) \ + ({ \ + struct mlx5_ifc_default_timeout_bits *time_field; \ + u32 to_multi, to_value; \ + u64 to_val_ms; \ + \ + time_field = MLX5_ADDR_OF(dtor_reg, reg_out, fld); \ + to_multi = MLX5_GET(default_timeout, time_field, to_multiplier); \ + to_value = MLX5_GET(default_timeout, time_field, to_value); \ + to_val_ms = tout_convert_reg_field_to_ms(to_multi, to_value); \ + to_val_ms; \ + }) + +#define MLX5_TIMEOUT_FILL(fld, reg_out, dev, to_type, to_extra) \ + ({ \ + u64 fw_to = MLX5_TIMEOUT_QUERY(fld, reg_out); \ + if (fw_to) \ + tout_set(dev, fw_to + (to_extra), to_type); \ + fw_to; \ + }) + +static int tout_query_dtor(struct mlx5_core_dev *dev) +{ + u64 pcie_toggle_to_val, tear_down_to_val; + u32 out[MLX5_ST_SZ_DW(dtor_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(dtor_reg)] = {}; + int err; + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_DTOR, 0, 0); + if (err) + return err; + + pcie_toggle_to_val = MLX5_TIMEOUT_FILL(pcie_toggle_to, out, dev, MLX5_TO_PCI_TOGGLE_MS, 0); + MLX5_TIMEOUT_FILL(fw_reset_to, out, dev, MLX5_TO_FW_RESET_MS, pcie_toggle_to_val); + + tear_down_to_val = MLX5_TIMEOUT_FILL(tear_down_to, out, dev, MLX5_TO_TEARDOWN_MS, 0); + MLX5_TIMEOUT_FILL(pci_sync_update_to, out, dev, MLX5_TO_PCI_SYNC_UPDATE_MS, + tear_down_to_val); + + MLX5_TIMEOUT_FILL(health_poll_to, out, dev, MLX5_TO_HEALTH_POLL_INTERVAL_MS, 0); + MLX5_TIMEOUT_FILL(full_crdump_to, out, dev, MLX5_TO_FULL_CRDUMP_MS, 0); + MLX5_TIMEOUT_FILL(flush_on_err_to, out, dev, MLX5_TO_FLUSH_ON_ERROR_MS, 0); + MLX5_TIMEOUT_FILL(fsm_reactivate_to, out, dev, MLX5_TO_FSM_REACTIVATE_MS, 0); + MLX5_TIMEOUT_FILL(reclaim_pages_to, out, dev, MLX5_TO_RECLAIM_PAGES_MS, 0); + MLX5_TIMEOUT_FILL(reclaim_vfs_pages_to, out, dev, MLX5_TO_RECLAIM_VFS_PAGES_MS, 0); + MLX5_TIMEOUT_FILL(reset_unload_to, out, dev, MLX5_TO_RESET_UNLOAD_MS, 0); + + return 0; +} + +int mlx5_tout_query_dtor(struct mlx5_core_dev *dev) +{ + if (tout_is_supported(dev)) + return tout_query_dtor(dev); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h new file mode 100644 index 0000000000..99e0a05526 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef MLX5_TIMEOUTS_H +#define MLX5_TIMEOUTS_H + +enum mlx5_timeouts_types { + /* pre init timeouts (not read from FW) */ + MLX5_TO_FW_PRE_INIT_TIMEOUT_MS, + MLX5_TO_FW_PRE_INIT_ON_RECOVERY_TIMEOUT_MS, + MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS, + MLX5_TO_FW_PRE_INIT_WAIT_MS, + + /* init segment timeouts */ + MLX5_TO_FW_INIT_MS, + MLX5_TO_CMD_MS, + + /* DTOR timeouts */ + MLX5_TO_PCI_TOGGLE_MS, + MLX5_TO_HEALTH_POLL_INTERVAL_MS, + MLX5_TO_FULL_CRDUMP_MS, + MLX5_TO_FW_RESET_MS, + MLX5_TO_FLUSH_ON_ERROR_MS, + MLX5_TO_PCI_SYNC_UPDATE_MS, + MLX5_TO_TEARDOWN_MS, + MLX5_TO_FSM_REACTIVATE_MS, + MLX5_TO_RECLAIM_PAGES_MS, + MLX5_TO_RECLAIM_VFS_PAGES_MS, + MLX5_TO_RESET_UNLOAD_MS, + + MAX_TIMEOUT_TYPES +}; + +struct mlx5_core_dev; +int mlx5_tout_init(struct mlx5_core_dev *dev); +void mlx5_tout_cleanup(struct mlx5_core_dev *dev); +void mlx5_tout_query_iseg(struct mlx5_core_dev *dev); +int mlx5_tout_query_dtor(struct mlx5_core_dev *dev); +u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type); + +#define mlx5_tout_ms(dev, type) _mlx5_tout_ms(dev, MLX5_TO_##type##_MS) + +# endif /* MLX5_TIMEOUTS_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c new file mode 100644 index 0000000000..d55e15c1f3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2016, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include "mlx5_core.h" +#include "vxlan.h" + +struct mlx5_vxlan { + struct mlx5_core_dev *mdev; + /* max_num_ports is usually 4, 16 buckets is more than enough */ + DECLARE_HASHTABLE(htable, 4); + struct mutex sync_lock; /* sync add/del port HW operations */ +}; + +struct mlx5_vxlan_port { + struct hlist_node hlist; + u16 udp_port; +}; + +static int mlx5_vxlan_core_add_port_cmd(struct mlx5_core_dev *mdev, u16 port) +{ + u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)] = {}; + + MLX5_SET(add_vxlan_udp_dport_in, in, opcode, + MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT); + MLX5_SET(add_vxlan_udp_dport_in, in, vxlan_udp_port, port); + return mlx5_cmd_exec_in(mdev, add_vxlan_udp_dport, in); +} + +static int mlx5_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port) +{ + u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)] = {}; + + MLX5_SET(delete_vxlan_udp_dport_in, in, opcode, + MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT); + MLX5_SET(delete_vxlan_udp_dport_in, in, vxlan_udp_port, port); + return mlx5_cmd_exec_in(mdev, delete_vxlan_udp_dport, in); +} + +bool mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port) +{ + struct mlx5_vxlan_port *vxlanp; + bool found = false; + + if (!mlx5_vxlan_allowed(vxlan)) + return NULL; + + rcu_read_lock(); + hash_for_each_possible_rcu(vxlan->htable, vxlanp, hlist, port) + if (vxlanp->udp_port == port) { + found = true; + break; + } + rcu_read_unlock(); + + return found; +} + +static struct mlx5_vxlan_port *vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port) +{ + struct mlx5_vxlan_port *vxlanp; + + hash_for_each_possible(vxlan->htable, vxlanp, hlist, port) + if (vxlanp->udp_port == port) + return vxlanp; + return NULL; +} + +int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port) +{ + struct mlx5_vxlan_port *vxlanp; + int ret; + + vxlanp = kzalloc(sizeof(*vxlanp), GFP_KERNEL); + if (!vxlanp) + return -ENOMEM; + vxlanp->udp_port = port; + + ret = mlx5_vxlan_core_add_port_cmd(vxlan->mdev, port); + if (ret) { + kfree(vxlanp); + return ret; + } + + mutex_lock(&vxlan->sync_lock); + hash_add_rcu(vxlan->htable, &vxlanp->hlist, port); + mutex_unlock(&vxlan->sync_lock); + + return 0; +} + +int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port) +{ + struct mlx5_vxlan_port *vxlanp; + int ret = 0; + + mutex_lock(&vxlan->sync_lock); + + vxlanp = vxlan_lookup_port(vxlan, port); + if (WARN_ON(!vxlanp)) { + ret = -ENOENT; + goto out_unlock; + } + + hash_del_rcu(&vxlanp->hlist); + synchronize_rcu(); + mlx5_vxlan_core_del_port_cmd(vxlan->mdev, port); + kfree(vxlanp); + +out_unlock: + mutex_unlock(&vxlan->sync_lock); + return ret; +} + +struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev) +{ + struct mlx5_vxlan *vxlan; + + if (!MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) || !mlx5_core_is_pf(mdev)) + return ERR_PTR(-ENOTSUPP); + + vxlan = kzalloc(sizeof(*vxlan), GFP_KERNEL); + if (!vxlan) + return ERR_PTR(-ENOMEM); + + vxlan->mdev = mdev; + mutex_init(&vxlan->sync_lock); + hash_init(vxlan->htable); + + /* Hardware adds 4789 (IANA_VXLAN_UDP_PORT) by default */ + mlx5_vxlan_add_port(vxlan, IANA_VXLAN_UDP_PORT); + + return vxlan; +} + +void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan) +{ + if (!mlx5_vxlan_allowed(vxlan)) + return; + + mlx5_vxlan_del_port(vxlan, IANA_VXLAN_UDP_PORT); + WARN_ON(!hash_empty(vxlan->htable)); + + kfree(vxlan); +} + +void mlx5_vxlan_reset_to_default(struct mlx5_vxlan *vxlan) +{ + struct mlx5_vxlan_port *vxlanp; + struct hlist_node *tmp; + int bkt; + + if (!mlx5_vxlan_allowed(vxlan)) + return; + + hash_for_each_safe(vxlan->htable, bkt, tmp, vxlanp, hlist) { + /* Don't delete default UDP port added by the HW. + * Remove only user configured ports + */ + if (vxlanp->udp_port == IANA_VXLAN_UDP_PORT) + continue; + mlx5_vxlan_del_port(vxlan, vxlanp->udp_port); + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h new file mode 100644 index 0000000000..34ef662da3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2016, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __MLX5_VXLAN_H__ +#define __MLX5_VXLAN_H__ + +#include + +struct mlx5_vxlan; +struct mlx5_vxlan_port; + +static inline u8 mlx5_vxlan_max_udp_ports(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_ETH(mdev, max_vxlan_udp_ports) ?: 4; +} + +static inline bool mlx5_vxlan_allowed(struct mlx5_vxlan *vxlan) +{ + /* not allowed reason is encoded in vxlan pointer as error, + * on mlx5_vxlan_create + */ + return !IS_ERR_OR_NULL(vxlan); +} + +#if IS_ENABLED(CONFIG_VXLAN) +struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev); +void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan); +int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port); +int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port); +bool mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port); +void mlx5_vxlan_reset_to_default(struct mlx5_vxlan *vxlan); +#else +static inline struct mlx5_vxlan* +mlx5_vxlan_create(struct mlx5_core_dev *mdev) { return ERR_PTR(-EOPNOTSUPP); } +static inline void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan) { return; } +static inline int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port) { return -EOPNOTSUPP; } +static inline int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port) { return -EOPNOTSUPP; } +static inline bool mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port) { return false; } +static inline void mlx5_vxlan_reset_to_default(struct mlx5_vxlan *vxlan) { return; } +#endif + +#endif /* __MLX5_VXLAN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c new file mode 100644 index 0000000000..6ca91c0e8a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -0,0 +1,2340 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mlx5_core.h" +#include "lib/eq.h" +#include "fs_core.h" +#include "lib/mpfs.h" +#include "eswitch.h" +#include "devlink.h" +#include "fw_reset.h" +#include "lib/mlx5.h" +#include "lib/tout.h" +#include "fpga/core.h" +#include "en_accel/ipsec.h" +#include "lib/clock.h" +#include "lib/vxlan.h" +#include "lib/geneve.h" +#include "lib/devcom.h" +#include "lib/pci_vsc.h" +#include "diag/fw_tracer.h" +#include "ecpf.h" +#include "lib/hv_vhca.h" +#include "diag/rsc_dump.h" +#include "sf/vhca_event.h" +#include "sf/dev/dev.h" +#include "sf/sf.h" +#include "mlx5_irq.h" +#include "hwmon.h" + +MODULE_AUTHOR("Eli Cohen "); +MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver"); +MODULE_LICENSE("Dual BSD/GPL"); + +unsigned int mlx5_core_debug_mask; +module_param_named(debug_mask, mlx5_core_debug_mask, uint, 0644); +MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0"); + +static unsigned int prof_sel = MLX5_DEFAULT_PROF; +module_param_named(prof_sel, prof_sel, uint, 0444); +MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2"); + +static u32 sw_owner_id[4]; +#define MAX_SW_VHCA_ID (BIT(__mlx5_bit_sz(cmd_hca_cap_2, sw_vhca_id)) - 1) +static DEFINE_IDA(sw_vhca_ida); + +enum { + MLX5_ATOMIC_REQ_MODE_BE = 0x0, + MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1, +}; + +#define LOG_MAX_SUPPORTED_QPS 0xff + +static struct mlx5_profile profile[] = { + [0] = { + .mask = 0, + .num_cmd_caches = MLX5_NUM_COMMAND_CACHES, + }, + [1] = { + .mask = MLX5_PROF_MASK_QP_SIZE, + .log_max_qp = 12, + .num_cmd_caches = MLX5_NUM_COMMAND_CACHES, + + }, + [2] = { + .mask = MLX5_PROF_MASK_QP_SIZE | + MLX5_PROF_MASK_MR_CACHE, + .log_max_qp = LOG_MAX_SUPPORTED_QPS, + .num_cmd_caches = MLX5_NUM_COMMAND_CACHES, + .mr_cache[0] = { + .size = 500, + .limit = 250 + }, + .mr_cache[1] = { + .size = 500, + .limit = 250 + }, + .mr_cache[2] = { + .size = 500, + .limit = 250 + }, + .mr_cache[3] = { + .size = 500, + .limit = 250 + }, + .mr_cache[4] = { + .size = 500, + .limit = 250 + }, + .mr_cache[5] = { + .size = 500, + .limit = 250 + }, + .mr_cache[6] = { + .size = 500, + .limit = 250 + }, + .mr_cache[7] = { + .size = 500, + .limit = 250 + }, + .mr_cache[8] = { + .size = 500, + .limit = 250 + }, + .mr_cache[9] = { + .size = 500, + .limit = 250 + }, + .mr_cache[10] = { + .size = 500, + .limit = 250 + }, + .mr_cache[11] = { + .size = 500, + .limit = 250 + }, + .mr_cache[12] = { + .size = 64, + .limit = 32 + }, + .mr_cache[13] = { + .size = 32, + .limit = 16 + }, + .mr_cache[14] = { + .size = 16, + .limit = 8 + }, + .mr_cache[15] = { + .size = 8, + .limit = 4 + }, + }, + [3] = { + .mask = MLX5_PROF_MASK_QP_SIZE, + .log_max_qp = LOG_MAX_SUPPORTED_QPS, + .num_cmd_caches = 0, + }, +}; + +static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili, + u32 warn_time_mili) +{ + unsigned long warn = jiffies + msecs_to_jiffies(warn_time_mili); + unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili); + u32 fw_initializing; + int err = 0; + + do { + fw_initializing = ioread32be(&dev->iseg->initializing); + if (!(fw_initializing >> 31)) + break; + if (time_after(jiffies, end) || + test_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) { + err = -EBUSY; + break; + } + if (warn_time_mili && time_after(jiffies, warn)) { + mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds (0x%x)\n", + jiffies_to_msecs(end - warn) / 1000, fw_initializing); + warn = jiffies + msecs_to_jiffies(warn_time_mili); + } + msleep(mlx5_tout_ms(dev, FW_PRE_INIT_WAIT)); + } while (true); + + return err; +} + +static void mlx5_set_driver_version(struct mlx5_core_dev *dev) +{ + int driver_ver_sz = MLX5_FLD_SZ_BYTES(set_driver_version_in, + driver_version); + u8 in[MLX5_ST_SZ_BYTES(set_driver_version_in)] = {}; + int remaining_size = driver_ver_sz; + char *string; + + if (!MLX5_CAP_GEN(dev, driver_version)) + return; + + string = MLX5_ADDR_OF(set_driver_version_in, in, driver_version); + + strncpy(string, "Linux", remaining_size); + + remaining_size = max_t(int, 0, driver_ver_sz - strlen(string)); + strncat(string, ",", remaining_size); + + remaining_size = max_t(int, 0, driver_ver_sz - strlen(string)); + strncat(string, KBUILD_MODNAME, remaining_size); + + remaining_size = max_t(int, 0, driver_ver_sz - strlen(string)); + strncat(string, ",", remaining_size); + + remaining_size = max_t(int, 0, driver_ver_sz - strlen(string)); + + snprintf(string + strlen(string), remaining_size, "%u.%u.%u", + LINUX_VERSION_MAJOR, LINUX_VERSION_PATCHLEVEL, + LINUX_VERSION_SUBLEVEL); + + /*Send the command*/ + MLX5_SET(set_driver_version_in, in, opcode, + MLX5_CMD_OP_SET_DRIVER_VERSION); + + mlx5_cmd_exec_in(dev, set_driver_version, in); +} + +static int set_dma_caps(struct pci_dev *pdev) +{ + int err; + + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) { + dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n"); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n"); + return err; + } + } + + dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024); + return err; +} + +static int mlx5_pci_enable_device(struct mlx5_core_dev *dev) +{ + struct pci_dev *pdev = dev->pdev; + int err = 0; + + mutex_lock(&dev->pci_status_mutex); + if (dev->pci_status == MLX5_PCI_STATUS_DISABLED) { + err = pci_enable_device(pdev); + if (!err) + dev->pci_status = MLX5_PCI_STATUS_ENABLED; + } + mutex_unlock(&dev->pci_status_mutex); + + return err; +} + +static void mlx5_pci_disable_device(struct mlx5_core_dev *dev) +{ + struct pci_dev *pdev = dev->pdev; + + mutex_lock(&dev->pci_status_mutex); + if (dev->pci_status == MLX5_PCI_STATUS_ENABLED) { + pci_disable_device(pdev); + dev->pci_status = MLX5_PCI_STATUS_DISABLED; + } + mutex_unlock(&dev->pci_status_mutex); +} + +static int request_bar(struct pci_dev *pdev) +{ + int err = 0; + + if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { + dev_err(&pdev->dev, "Missing registers BAR, aborting\n"); + return -ENODEV; + } + + err = pci_request_regions(pdev, KBUILD_MODNAME); + if (err) + dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n"); + + return err; +} + +static void release_bar(struct pci_dev *pdev) +{ + pci_release_regions(pdev); +} + +struct mlx5_reg_host_endianness { + u8 he; + u8 rsvd[15]; +}; + +static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size) +{ + switch (size) { + case 128: + return 0; + case 256: + return 1; + case 512: + return 2; + case 1024: + return 3; + case 2048: + return 4; + case 4096: + return 5; + default: + mlx5_core_warn(dev, "invalid pkey table size %d\n", size); + return 0; + } +} + +void mlx5_core_uplink_netdev_set(struct mlx5_core_dev *dev, struct net_device *netdev) +{ + mutex_lock(&dev->mlx5e_res.uplink_netdev_lock); + dev->mlx5e_res.uplink_netdev = netdev; + mlx5_blocking_notifier_call_chain(dev, MLX5_DRIVER_EVENT_UPLINK_NETDEV, + netdev); + mutex_unlock(&dev->mlx5e_res.uplink_netdev_lock); +} + +void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *dev) +{ + mutex_lock(&dev->mlx5e_res.uplink_netdev_lock); + mlx5_blocking_notifier_call_chain(dev, MLX5_DRIVER_EVENT_UPLINK_NETDEV, + dev->mlx5e_res.uplink_netdev); + mutex_unlock(&dev->mlx5e_res.uplink_netdev_lock); +} +EXPORT_SYMBOL(mlx5_core_uplink_netdev_event_replay); + +void mlx5_core_mp_event_replay(struct mlx5_core_dev *dev, u32 event, void *data) +{ + mlx5_blocking_notifier_call_chain(dev, event, data); +} +EXPORT_SYMBOL(mlx5_core_mp_event_replay); + +int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type, + enum mlx5_cap_mode cap_mode) +{ + u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)]; + int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *out, *hca_caps; + u16 opmod = (cap_type << 1) | (cap_mode & 0x01); + int err; + + memset(in, 0, sizeof(in)); + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); + MLX5_SET(query_hca_cap_in, in, op_mod, opmod); + err = mlx5_cmd_exec_inout(dev, query_hca_cap, in, out); + if (err) { + mlx5_core_warn(dev, + "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n", + cap_type, cap_mode, err); + goto query_ex; + } + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability); + + switch (cap_mode) { + case HCA_CAP_OPMOD_GET_MAX: + memcpy(dev->caps.hca[cap_type]->max, hca_caps, + MLX5_UN_SZ_BYTES(hca_cap_union)); + break; + case HCA_CAP_OPMOD_GET_CUR: + memcpy(dev->caps.hca[cap_type]->cur, hca_caps, + MLX5_UN_SZ_BYTES(hca_cap_union)); + break; + default: + mlx5_core_warn(dev, + "Tried to query dev cap type(%x) with wrong opmode(%x)\n", + cap_type, cap_mode); + err = -EINVAL; + break; + } +query_ex: + kfree(out); + return err; +} + +int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type) +{ + int ret; + + ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR); + if (ret) + return ret; + return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX); +} + +static int set_caps(struct mlx5_core_dev *dev, void *in, int opmod) +{ + MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP); + MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1); + return mlx5_cmd_exec_in(dev, set_hca_cap, in); +} + +static int handle_hca_cap_atomic(struct mlx5_core_dev *dev, void *set_ctx) +{ + void *set_hca_cap; + int req_endianness; + int err; + + if (!MLX5_CAP_GEN(dev, atomic)) + return 0; + + err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC); + if (err) + return err; + + req_endianness = + MLX5_CAP_ATOMIC(dev, + supported_atomic_req_8B_endianness_mode_1); + + if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS) + return 0; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); + + /* Set requestor to host endianness */ + MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianness_mode, + MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS); + + return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC); +} + +static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx) +{ + void *set_hca_cap; + bool do_set = false; + int err; + + if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) || + !MLX5_CAP_GEN(dev, pg)) + return 0; + + err = mlx5_core_get_caps(dev, MLX5_CAP_ODP); + if (err) + return err; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); + memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_ODP]->cur, + MLX5_ST_SZ_BYTES(odp_cap)); + +#define ODP_CAP_SET_MAX(dev, field) \ + do { \ + u32 _res = MLX5_CAP_ODP_MAX(dev, field); \ + if (_res) { \ + do_set = true; \ + MLX5_SET(odp_cap, set_hca_cap, field, _res); \ + } \ + } while (0) + + ODP_CAP_SET_MAX(dev, ud_odp_caps.srq_receive); + ODP_CAP_SET_MAX(dev, rc_odp_caps.srq_receive); + ODP_CAP_SET_MAX(dev, xrc_odp_caps.srq_receive); + ODP_CAP_SET_MAX(dev, xrc_odp_caps.send); + ODP_CAP_SET_MAX(dev, xrc_odp_caps.receive); + ODP_CAP_SET_MAX(dev, xrc_odp_caps.write); + ODP_CAP_SET_MAX(dev, xrc_odp_caps.read); + ODP_CAP_SET_MAX(dev, xrc_odp_caps.atomic); + ODP_CAP_SET_MAX(dev, dc_odp_caps.srq_receive); + ODP_CAP_SET_MAX(dev, dc_odp_caps.send); + ODP_CAP_SET_MAX(dev, dc_odp_caps.receive); + ODP_CAP_SET_MAX(dev, dc_odp_caps.write); + ODP_CAP_SET_MAX(dev, dc_odp_caps.read); + ODP_CAP_SET_MAX(dev, dc_odp_caps.atomic); + + if (!do_set) + return 0; + + return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ODP); +} + +static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + union devlink_param_value val; + int err; + + err = devl_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_MAX_MACS, + &val); + if (!err) + return val.vu32; + mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err); + return err; +} + +bool mlx5_is_roce_on(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + union devlink_param_value val; + int err; + + err = devl_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, + &val); + + if (!err) + return val.vbool; + + mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err); + return MLX5_CAP_GEN(dev, roce); +} +EXPORT_SYMBOL(mlx5_is_roce_on); + +static int handle_hca_cap_2(struct mlx5_core_dev *dev, void *set_ctx) +{ + void *set_hca_cap; + int err; + + if (!MLX5_CAP_GEN_MAX(dev, hca_cap_2)) + return 0; + + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL_2); + if (err) + return err; + + if (!MLX5_CAP_GEN_2_MAX(dev, sw_vhca_id_valid) || + !(dev->priv.sw_vhca_id > 0)) + return 0; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, + capability); + memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_GENERAL_2]->cur, + MLX5_ST_SZ_BYTES(cmd_hca_cap_2)); + MLX5_SET(cmd_hca_cap_2, set_hca_cap, sw_vhca_id_valid, 1); + + return set_caps(dev, set_ctx, MLX5_CAP_GENERAL_2); +} + +static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) +{ + struct mlx5_profile *prof = &dev->profile; + void *set_hca_cap; + int max_uc_list; + int err; + + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL); + if (err) + return err; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, + capability); + memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_GENERAL]->cur, + MLX5_ST_SZ_BYTES(cmd_hca_cap)); + + mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n", + mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)), + 128); + /* we limit the size of the pkey table to 128 entries for now */ + MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size, + to_fw_pkey_sz(dev, 128)); + + /* Check log_max_qp from HCA caps to set in current profile */ + if (prof->log_max_qp == LOG_MAX_SUPPORTED_QPS) { + prof->log_max_qp = min_t(u8, 18, MLX5_CAP_GEN_MAX(dev, log_max_qp)); + } else if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) { + mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n", + prof->log_max_qp, + MLX5_CAP_GEN_MAX(dev, log_max_qp)); + prof->log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp); + } + if (prof->mask & MLX5_PROF_MASK_QP_SIZE) + MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp, + prof->log_max_qp); + + /* disable cmdif checksum */ + MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0); + + /* Enable 4K UAR only when HCA supports it and page size is bigger + * than 4K. + */ + if (MLX5_CAP_GEN_MAX(dev, uar_4k) && PAGE_SIZE > 4096) + MLX5_SET(cmd_hca_cap, set_hca_cap, uar_4k, 1); + + MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12); + + if (MLX5_CAP_GEN_MAX(dev, cache_line_128byte)) + MLX5_SET(cmd_hca_cap, + set_hca_cap, + cache_line_128byte, + cache_line_size() >= 128 ? 1 : 0); + + if (MLX5_CAP_GEN_MAX(dev, dct)) + MLX5_SET(cmd_hca_cap, set_hca_cap, dct, 1); + + if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_event)) + MLX5_SET(cmd_hca_cap, set_hca_cap, pci_sync_for_fw_update_event, 1); + if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_with_driver_unload)) + MLX5_SET(cmd_hca_cap, set_hca_cap, + pci_sync_for_fw_update_with_driver_unload, 1); + + if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports)) + MLX5_SET(cmd_hca_cap, + set_hca_cap, + num_vhca_ports, + MLX5_CAP_GEN_MAX(dev, num_vhca_ports)); + + if (MLX5_CAP_GEN_MAX(dev, release_all_pages)) + MLX5_SET(cmd_hca_cap, set_hca_cap, release_all_pages, 1); + + if (MLX5_CAP_GEN_MAX(dev, mkey_by_name)) + MLX5_SET(cmd_hca_cap, set_hca_cap, mkey_by_name, 1); + + mlx5_vhca_state_cap_handle(dev, set_hca_cap); + + if (MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix)) + MLX5_SET(cmd_hca_cap, set_hca_cap, num_total_dynamic_vf_msix, + MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix)); + + if (MLX5_CAP_GEN(dev, roce_rw_supported) && MLX5_CAP_GEN_MAX(dev, roce)) + MLX5_SET(cmd_hca_cap, set_hca_cap, roce, + mlx5_is_roce_on(dev)); + + max_uc_list = max_uc_list_get_devlink_param(dev); + if (max_uc_list > 0) + MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_current_uc_list, + ilog2(max_uc_list)); + + return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); +} + +/* Cached MLX5_CAP_GEN(dev, roce) can be out of sync this early in the + * boot process. + * In case RoCE cap is writable in FW and user/devlink requested to change the + * cap, we are yet to query the final state of the above cap. + * Hence, the need for this function. + * + * Returns + * True: + * 1) RoCE cap is read only in FW and already disabled + * OR: + * 2) RoCE cap is writable in FW and user/devlink requested it off. + * + * In any other case, return False. + */ +static bool is_roce_fw_disabled(struct mlx5_core_dev *dev) +{ + return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_on(dev)) || + (!MLX5_CAP_GEN(dev, roce_rw_supported) && !MLX5_CAP_GEN(dev, roce)); +} + +static int handle_hca_cap_roce(struct mlx5_core_dev *dev, void *set_ctx) +{ + void *set_hca_cap; + int err; + + if (is_roce_fw_disabled(dev)) + return 0; + + err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE); + if (err) + return err; + + if (MLX5_CAP_ROCE(dev, sw_r_roce_src_udp_port) || + !MLX5_CAP_ROCE_MAX(dev, sw_r_roce_src_udp_port)) + return 0; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); + memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_ROCE]->cur, + MLX5_ST_SZ_BYTES(roce_cap)); + MLX5_SET(roce_cap, set_hca_cap, sw_r_roce_src_udp_port, 1); + + if (MLX5_CAP_ROCE_MAX(dev, qp_ooo_transmit_default)) + MLX5_SET(roce_cap, set_hca_cap, qp_ooo_transmit_default, 1); + + err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ROCE); + return err; +} + +static int handle_hca_cap_port_selection(struct mlx5_core_dev *dev, + void *set_ctx) +{ + void *set_hca_cap; + int err; + + if (!MLX5_CAP_GEN(dev, port_selection_cap)) + return 0; + + err = mlx5_core_get_caps(dev, MLX5_CAP_PORT_SELECTION); + if (err) + return err; + + if (MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass) || + !MLX5_CAP_PORT_SELECTION_MAX(dev, port_select_flow_table_bypass)) + return 0; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); + memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_PORT_SELECTION]->cur, + MLX5_ST_SZ_BYTES(port_selection_cap)); + MLX5_SET(port_selection_cap, set_hca_cap, port_select_flow_table_bypass, 1); + + err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_PORT_SELECTION); + + return err; +} + +static int set_hca_cap(struct mlx5_core_dev *dev) +{ + int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + void *set_ctx; + int err; + + set_ctx = kzalloc(set_sz, GFP_KERNEL); + if (!set_ctx) + return -ENOMEM; + + err = handle_hca_cap(dev, set_ctx); + if (err) { + mlx5_core_err(dev, "handle_hca_cap failed\n"); + goto out; + } + + memset(set_ctx, 0, set_sz); + err = handle_hca_cap_atomic(dev, set_ctx); + if (err) { + mlx5_core_err(dev, "handle_hca_cap_atomic failed\n"); + goto out; + } + + memset(set_ctx, 0, set_sz); + err = handle_hca_cap_odp(dev, set_ctx); + if (err) { + mlx5_core_err(dev, "handle_hca_cap_odp failed\n"); + goto out; + } + + memset(set_ctx, 0, set_sz); + err = handle_hca_cap_roce(dev, set_ctx); + if (err) { + mlx5_core_err(dev, "handle_hca_cap_roce failed\n"); + goto out; + } + + memset(set_ctx, 0, set_sz); + err = handle_hca_cap_2(dev, set_ctx); + if (err) { + mlx5_core_err(dev, "handle_hca_cap_2 failed\n"); + goto out; + } + + memset(set_ctx, 0, set_sz); + err = handle_hca_cap_port_selection(dev, set_ctx); + if (err) { + mlx5_core_err(dev, "handle_hca_cap_port_selection failed\n"); + goto out; + } + +out: + kfree(set_ctx); + return err; +} + +static int set_hca_ctrl(struct mlx5_core_dev *dev) +{ + struct mlx5_reg_host_endianness he_in; + struct mlx5_reg_host_endianness he_out; + int err; + + if (!mlx5_core_is_pf(dev)) + return 0; + + memset(&he_in, 0, sizeof(he_in)); + he_in.he = MLX5_SET_HOST_ENDIANNESS; + err = mlx5_core_access_reg(dev, &he_in, sizeof(he_in), + &he_out, sizeof(he_out), + MLX5_REG_HOST_ENDIANNESS, 0, 1); + return err; +} + +static int mlx5_core_set_hca_defaults(struct mlx5_core_dev *dev) +{ + int ret = 0; + + /* Disable local_lb by default */ + if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) + ret = mlx5_nic_vport_update_local_lb(dev, false); + + return ret; +} + +int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id) +{ + u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {}; + + MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); + MLX5_SET(enable_hca_in, in, function_id, func_id); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, + dev->caps.embedded_cpu); + return mlx5_cmd_exec_in(dev, enable_hca, in); +} + +int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id) +{ + u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {}; + + MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); + MLX5_SET(disable_hca_in, in, function_id, func_id); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, + dev->caps.embedded_cpu); + return mlx5_cmd_exec_in(dev, disable_hca, in); +} + +static int mlx5_core_set_issi(struct mlx5_core_dev *dev) +{ + u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {}; + u32 query_in[MLX5_ST_SZ_DW(query_issi_in)] = {}; + u32 sup_issi; + int err; + + MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI); + err = mlx5_cmd_exec_inout(dev, query_issi, query_in, query_out); + if (err) { + u32 syndrome = MLX5_GET(query_issi_out, query_out, syndrome); + u8 status = MLX5_GET(query_issi_out, query_out, status); + + if (!status || syndrome == MLX5_DRIVER_SYND) { + mlx5_core_err(dev, "Failed to query ISSI err(%d) status(%d) synd(%d)\n", + err, status, syndrome); + return err; + } + + mlx5_core_warn(dev, "Query ISSI is not supported by FW, ISSI is 0\n"); + dev->issi = 0; + return 0; + } + + sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0); + + if (sup_issi & (1 << 1)) { + u32 set_in[MLX5_ST_SZ_DW(set_issi_in)] = {}; + + MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI); + MLX5_SET(set_issi_in, set_in, current_issi, 1); + err = mlx5_cmd_exec_in(dev, set_issi, set_in); + if (err) { + mlx5_core_err(dev, "Failed to set ISSI to 1 err(%d)\n", + err); + return err; + } + + dev->issi = 1; + + return 0; + } else if (sup_issi & (1 << 0) || !sup_issi) { + return 0; + } + + return -EOPNOTSUPP; +} + +static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev, + const struct pci_device_id *id) +{ + int err = 0; + + mutex_init(&dev->pci_status_mutex); + pci_set_drvdata(dev->pdev, dev); + + dev->bar_addr = pci_resource_start(pdev, 0); + + err = mlx5_pci_enable_device(dev); + if (err) { + mlx5_core_err(dev, "Cannot enable PCI device, aborting\n"); + return err; + } + + err = request_bar(pdev); + if (err) { + mlx5_core_err(dev, "error requesting BARs, aborting\n"); + goto err_disable; + } + + pci_set_master(pdev); + + err = set_dma_caps(pdev); + if (err) { + mlx5_core_err(dev, "Failed setting DMA capabilities mask, aborting\n"); + goto err_clr_master; + } + + if (pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32) && + pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP64) && + pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP128)) + mlx5_core_dbg(dev, "Enabling pci atomics failed\n"); + + dev->iseg_base = dev->bar_addr; + dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg)); + if (!dev->iseg) { + err = -ENOMEM; + mlx5_core_err(dev, "Failed mapping initialization segment, aborting\n"); + goto err_clr_master; + } + + mlx5_pci_vsc_init(dev); + return 0; + +err_clr_master: + release_bar(dev->pdev); +err_disable: + mlx5_pci_disable_device(dev); + return err; +} + +static void mlx5_pci_close(struct mlx5_core_dev *dev) +{ + /* health work might still be active, and it needs pci bar in + * order to know the NIC state. Therefore, drain the health WQ + * before removing the pci bars + */ + mlx5_drain_health_wq(dev); + iounmap(dev->iseg); + release_bar(dev->pdev); + mlx5_pci_disable_device(dev); +} + +static int mlx5_init_once(struct mlx5_core_dev *dev) +{ + int err; + + dev->priv.devc = mlx5_devcom_register_device(dev); + if (IS_ERR(dev->priv.devc)) + mlx5_core_warn(dev, "failed to register devcom device %ld\n", + PTR_ERR(dev->priv.devc)); + + err = mlx5_query_board_id(dev); + if (err) { + mlx5_core_err(dev, "query board id failed\n"); + goto err_devcom; + } + + err = mlx5_irq_table_init(dev); + if (err) { + mlx5_core_err(dev, "failed to initialize irq table\n"); + goto err_devcom; + } + + err = mlx5_eq_table_init(dev); + if (err) { + mlx5_core_err(dev, "failed to initialize eq\n"); + goto err_irq_cleanup; + } + + err = mlx5_events_init(dev); + if (err) { + mlx5_core_err(dev, "failed to initialize events\n"); + goto err_eq_cleanup; + } + + err = mlx5_fw_reset_init(dev); + if (err) { + mlx5_core_err(dev, "failed to initialize fw reset events\n"); + goto err_events_cleanup; + } + + mlx5_cq_debugfs_init(dev); + + mlx5_init_reserved_gids(dev); + + mlx5_init_clock(dev); + + dev->vxlan = mlx5_vxlan_create(dev); + dev->geneve = mlx5_geneve_create(dev); + + err = mlx5_init_rl_table(dev); + if (err) { + mlx5_core_err(dev, "Failed to init rate limiting\n"); + goto err_tables_cleanup; + } + + err = mlx5_mpfs_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init l2 table %d\n", err); + goto err_rl_cleanup; + } + + err = mlx5_sriov_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init sriov %d\n", err); + goto err_mpfs_cleanup; + } + + err = mlx5_eswitch_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init eswitch %d\n", err); + goto err_sriov_cleanup; + } + + err = mlx5_fpga_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init fpga device %d\n", err); + goto err_eswitch_cleanup; + } + + err = mlx5_vhca_event_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init vhca event notifier %d\n", err); + goto err_fpga_cleanup; + } + + err = mlx5_sf_hw_table_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init SF HW table %d\n", err); + goto err_sf_hw_table_cleanup; + } + + err = mlx5_sf_table_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init SF table %d\n", err); + goto err_sf_table_cleanup; + } + + err = mlx5_fs_core_alloc(dev); + if (err) { + mlx5_core_err(dev, "Failed to alloc flow steering\n"); + goto err_fs; + } + + dev->dm = mlx5_dm_create(dev); + if (IS_ERR(dev->dm)) + mlx5_core_warn(dev, "Failed to init device memory %ld\n", PTR_ERR(dev->dm)); + + dev->tracer = mlx5_fw_tracer_create(dev); + dev->hv_vhca = mlx5_hv_vhca_create(dev); + dev->rsc_dump = mlx5_rsc_dump_create(dev); + + return 0; + +err_fs: + mlx5_sf_table_cleanup(dev); +err_sf_table_cleanup: + mlx5_sf_hw_table_cleanup(dev); +err_sf_hw_table_cleanup: + mlx5_vhca_event_cleanup(dev); +err_fpga_cleanup: + mlx5_fpga_cleanup(dev); +err_eswitch_cleanup: + mlx5_eswitch_cleanup(dev->priv.eswitch); +err_sriov_cleanup: + mlx5_sriov_cleanup(dev); +err_mpfs_cleanup: + mlx5_mpfs_cleanup(dev); +err_rl_cleanup: + mlx5_cleanup_rl_table(dev); +err_tables_cleanup: + mlx5_geneve_destroy(dev->geneve); + mlx5_vxlan_destroy(dev->vxlan); + mlx5_cleanup_clock(dev); + mlx5_cleanup_reserved_gids(dev); + mlx5_cq_debugfs_cleanup(dev); + mlx5_fw_reset_cleanup(dev); +err_events_cleanup: + mlx5_events_cleanup(dev); +err_eq_cleanup: + mlx5_eq_table_cleanup(dev); +err_irq_cleanup: + mlx5_irq_table_cleanup(dev); +err_devcom: + mlx5_devcom_unregister_device(dev->priv.devc); + + return err; +} + +static void mlx5_cleanup_once(struct mlx5_core_dev *dev) +{ + mlx5_rsc_dump_destroy(dev); + mlx5_hv_vhca_destroy(dev->hv_vhca); + mlx5_fw_tracer_destroy(dev->tracer); + mlx5_dm_cleanup(dev); + mlx5_fs_core_free(dev); + mlx5_sf_table_cleanup(dev); + mlx5_sf_hw_table_cleanup(dev); + mlx5_vhca_event_cleanup(dev); + mlx5_fpga_cleanup(dev); + mlx5_eswitch_cleanup(dev->priv.eswitch); + mlx5_sriov_cleanup(dev); + mlx5_mpfs_cleanup(dev); + mlx5_cleanup_rl_table(dev); + mlx5_geneve_destroy(dev->geneve); + mlx5_vxlan_destroy(dev->vxlan); + mlx5_cleanup_clock(dev); + mlx5_cleanup_reserved_gids(dev); + mlx5_cq_debugfs_cleanup(dev); + mlx5_fw_reset_cleanup(dev); + mlx5_events_cleanup(dev); + mlx5_eq_table_cleanup(dev); + mlx5_irq_table_cleanup(dev); + mlx5_devcom_unregister_device(dev->priv.devc); +} + +static int mlx5_function_enable(struct mlx5_core_dev *dev, bool boot, u64 timeout) +{ + int err; + + mlx5_core_info(dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev), + fw_rev_min(dev), fw_rev_sub(dev)); + + /* Only PFs hold the relevant PCIe information for this query */ + if (mlx5_core_is_pf(dev)) + pcie_print_link_status(dev->pdev); + + /* wait for firmware to accept initialization segments configurations + */ + err = wait_fw_init(dev, timeout, + mlx5_tout_ms(dev, FW_PRE_INIT_WARN_MESSAGE_INTERVAL)); + if (err) { + mlx5_core_err(dev, "Firmware over %llu MS in pre-initializing state, aborting\n", + timeout); + return err; + } + + err = mlx5_cmd_enable(dev); + if (err) { + mlx5_core_err(dev, "Failed initializing command interface, aborting\n"); + return err; + } + + mlx5_tout_query_iseg(dev); + + err = wait_fw_init(dev, mlx5_tout_ms(dev, FW_INIT), 0); + if (err) { + mlx5_core_err(dev, "Firmware over %llu MS in initializing state, aborting\n", + mlx5_tout_ms(dev, FW_INIT)); + goto err_cmd_cleanup; + } + + dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev); + mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_UP); + + mlx5_start_health_poll(dev); + + err = mlx5_core_enable_hca(dev, 0); + if (err) { + mlx5_core_err(dev, "enable hca failed\n"); + goto stop_health_poll; + } + + err = mlx5_core_set_issi(dev); + if (err) { + mlx5_core_err(dev, "failed to set issi\n"); + goto err_disable_hca; + } + + err = mlx5_satisfy_startup_pages(dev, 1); + if (err) { + mlx5_core_err(dev, "failed to allocate boot pages\n"); + goto err_disable_hca; + } + + err = mlx5_tout_query_dtor(dev); + if (err) { + mlx5_core_err(dev, "failed to read dtor\n"); + goto reclaim_boot_pages; + } + + return 0; + +reclaim_boot_pages: + mlx5_reclaim_startup_pages(dev); +err_disable_hca: + mlx5_core_disable_hca(dev, 0); +stop_health_poll: + mlx5_stop_health_poll(dev, boot); +err_cmd_cleanup: + mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); + mlx5_cmd_disable(dev); + + return err; +} + +static void mlx5_function_disable(struct mlx5_core_dev *dev, bool boot) +{ + mlx5_reclaim_startup_pages(dev); + mlx5_core_disable_hca(dev, 0); + mlx5_stop_health_poll(dev, boot); + mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); + mlx5_cmd_disable(dev); +} + +static int mlx5_function_open(struct mlx5_core_dev *dev) +{ + int err; + + err = set_hca_ctrl(dev); + if (err) { + mlx5_core_err(dev, "set_hca_ctrl failed\n"); + return err; + } + + err = set_hca_cap(dev); + if (err) { + mlx5_core_err(dev, "set_hca_cap failed\n"); + return err; + } + + err = mlx5_satisfy_startup_pages(dev, 0); + if (err) { + mlx5_core_err(dev, "failed to allocate init pages\n"); + return err; + } + + err = mlx5_cmd_init_hca(dev, sw_owner_id); + if (err) { + mlx5_core_err(dev, "init hca failed\n"); + return err; + } + + mlx5_set_driver_version(dev); + + err = mlx5_query_hca_caps(dev); + if (err) { + mlx5_core_err(dev, "query hca failed\n"); + return err; + } + mlx5_start_health_fw_log_up(dev); + return 0; +} + +static int mlx5_function_close(struct mlx5_core_dev *dev) +{ + int err; + + err = mlx5_cmd_teardown_hca(dev); + if (err) { + mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n"); + return err; + } + + return 0; +} + +static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout) +{ + int err; + + err = mlx5_function_enable(dev, boot, timeout); + if (err) + return err; + + err = mlx5_function_open(dev); + if (err) + mlx5_function_disable(dev, boot); + return err; +} + +static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) +{ + int err = mlx5_function_close(dev); + + if (!err) + mlx5_function_disable(dev, boot); + return err; +} + +static int mlx5_load(struct mlx5_core_dev *dev) +{ + int err; + + dev->priv.uar = mlx5_get_uars_page(dev); + if (IS_ERR(dev->priv.uar)) { + mlx5_core_err(dev, "Failed allocating uar, aborting\n"); + err = PTR_ERR(dev->priv.uar); + return err; + } + + mlx5_events_start(dev); + mlx5_pagealloc_start(dev); + + err = mlx5_irq_table_create(dev); + if (err) { + mlx5_core_err(dev, "Failed to alloc IRQs\n"); + goto err_irq_table; + } + + err = mlx5_eq_table_create(dev); + if (err) { + mlx5_core_err(dev, "Failed to create EQs\n"); + goto err_eq_table; + } + + err = mlx5_fw_tracer_init(dev->tracer); + if (err) { + mlx5_core_err(dev, "Failed to init FW tracer %d\n", err); + mlx5_fw_tracer_destroy(dev->tracer); + dev->tracer = NULL; + } + + mlx5_fw_reset_events_start(dev); + mlx5_hv_vhca_init(dev->hv_vhca); + + err = mlx5_rsc_dump_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init Resource dump %d\n", err); + mlx5_rsc_dump_destroy(dev); + dev->rsc_dump = NULL; + } + + err = mlx5_fpga_device_start(dev); + if (err) { + mlx5_core_err(dev, "fpga device start failed %d\n", err); + goto err_fpga_start; + } + + err = mlx5_fs_core_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init flow steering\n"); + goto err_fs; + } + + err = mlx5_core_set_hca_defaults(dev); + if (err) { + mlx5_core_err(dev, "Failed to set hca defaults\n"); + goto err_set_hca; + } + + mlx5_vhca_event_start(dev); + + err = mlx5_sf_hw_table_create(dev); + if (err) { + mlx5_core_err(dev, "sf table create failed %d\n", err); + goto err_vhca; + } + + err = mlx5_ec_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init embedded CPU\n"); + goto err_ec; + } + + mlx5_lag_add_mdev(dev); + err = mlx5_sriov_attach(dev); + if (err) { + mlx5_core_err(dev, "sriov init failed %d\n", err); + goto err_sriov; + } + + mlx5_sf_dev_table_create(dev); + + err = mlx5_devlink_traps_register(priv_to_devlink(dev)); + if (err) + goto err_traps_reg; + + return 0; + +err_traps_reg: + mlx5_sf_dev_table_destroy(dev); + mlx5_sriov_detach(dev); +err_sriov: + mlx5_lag_remove_mdev(dev); + mlx5_ec_cleanup(dev); +err_ec: + mlx5_sf_hw_table_destroy(dev); +err_vhca: + mlx5_vhca_event_stop(dev); +err_set_hca: + mlx5_fs_core_cleanup(dev); +err_fs: + mlx5_fpga_device_stop(dev); +err_fpga_start: + mlx5_rsc_dump_cleanup(dev); + mlx5_hv_vhca_cleanup(dev->hv_vhca); + mlx5_fw_reset_events_stop(dev); + mlx5_fw_tracer_cleanup(dev->tracer); + mlx5_eq_table_destroy(dev); +err_eq_table: + mlx5_irq_table_destroy(dev); +err_irq_table: + mlx5_pagealloc_stop(dev); + mlx5_events_stop(dev); + mlx5_put_uars_page(dev, dev->priv.uar); + return err; +} + +static void mlx5_unload(struct mlx5_core_dev *dev) +{ + mlx5_devlink_traps_unregister(priv_to_devlink(dev)); + mlx5_sf_dev_table_destroy(dev); + mlx5_eswitch_disable(dev->priv.eswitch); + mlx5_sriov_detach(dev); + mlx5_lag_remove_mdev(dev); + mlx5_ec_cleanup(dev); + mlx5_sf_hw_table_destroy(dev); + mlx5_vhca_event_stop(dev); + mlx5_fs_core_cleanup(dev); + mlx5_fpga_device_stop(dev); + mlx5_rsc_dump_cleanup(dev); + mlx5_hv_vhca_cleanup(dev->hv_vhca); + mlx5_fw_reset_events_stop(dev); + mlx5_fw_tracer_cleanup(dev->tracer); + mlx5_eq_table_destroy(dev); + mlx5_irq_table_destroy(dev); + mlx5_pagealloc_stop(dev); + mlx5_events_stop(dev); + mlx5_put_uars_page(dev, dev->priv.uar); +} + +int mlx5_init_one_devl_locked(struct mlx5_core_dev *dev) +{ + bool light_probe = mlx5_dev_is_lightweight(dev); + int err = 0; + + mutex_lock(&dev->intf_state_mutex); + dev->state = MLX5_DEVICE_STATE_UP; + + err = mlx5_function_setup(dev, true, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); + if (err) + goto err_function; + + err = mlx5_init_once(dev); + if (err) { + mlx5_core_err(dev, "sw objs init failed\n"); + goto function_teardown; + } + + /* In case of light_probe, mlx5_devlink is already registered. + * Hence, don't register devlink again. + */ + if (!light_probe) { + err = mlx5_devlink_params_register(priv_to_devlink(dev)); + if (err) + goto err_devlink_params_reg; + } + + err = mlx5_load(dev); + if (err) + goto err_load; + + set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); + + err = mlx5_register_device(dev); + if (err) + goto err_register; + + mutex_unlock(&dev->intf_state_mutex); + return 0; + +err_register: + clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); + mlx5_unload(dev); +err_load: + if (!light_probe) + mlx5_devlink_params_unregister(priv_to_devlink(dev)); +err_devlink_params_reg: + mlx5_cleanup_once(dev); +function_teardown: + mlx5_function_teardown(dev, true); +err_function: + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + mutex_unlock(&dev->intf_state_mutex); + return err; +} + +int mlx5_init_one(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + int err; + + devl_lock(devlink); + err = mlx5_init_one_devl_locked(dev); + devl_unlock(devlink); + return err; +} + +void mlx5_uninit_one(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + + devl_lock(devlink); + mutex_lock(&dev->intf_state_mutex); + + mlx5_unregister_device(dev); + + if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { + mlx5_core_warn(dev, "%s: interface is down, NOP\n", + __func__); + mlx5_devlink_params_unregister(priv_to_devlink(dev)); + mlx5_cleanup_once(dev); + goto out; + } + + clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); + mlx5_unload(dev); + mlx5_devlink_params_unregister(priv_to_devlink(dev)); + mlx5_cleanup_once(dev); + mlx5_function_teardown(dev, true); +out: + mutex_unlock(&dev->intf_state_mutex); + devl_unlock(devlink); +} + +int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery) +{ + int err = 0; + u64 timeout; + + devl_assert_locked(priv_to_devlink(dev)); + mutex_lock(&dev->intf_state_mutex); + if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { + mlx5_core_warn(dev, "interface is up, NOP\n"); + goto out; + } + /* remove any previous indication of internal error */ + dev->state = MLX5_DEVICE_STATE_UP; + + if (recovery) + timeout = mlx5_tout_ms(dev, FW_PRE_INIT_ON_RECOVERY_TIMEOUT); + else + timeout = mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT); + err = mlx5_function_setup(dev, false, timeout); + if (err) + goto err_function; + + err = mlx5_load(dev); + if (err) + goto err_load; + + set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); + + err = mlx5_attach_device(dev); + if (err) + goto err_attach; + + mutex_unlock(&dev->intf_state_mutex); + return 0; + +err_attach: + clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); + mlx5_unload(dev); +err_load: + mlx5_function_teardown(dev, false); +err_function: + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; +out: + mutex_unlock(&dev->intf_state_mutex); + return err; +} + +int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery) +{ + struct devlink *devlink = priv_to_devlink(dev); + int ret; + + devl_lock(devlink); + ret = mlx5_load_one_devl_locked(dev, recovery); + devl_unlock(devlink); + return ret; +} + +void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev, bool suspend) +{ + devl_assert_locked(priv_to_devlink(dev)); + mutex_lock(&dev->intf_state_mutex); + + mlx5_detach_device(dev, suspend); + + if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { + mlx5_core_warn(dev, "%s: interface is down, NOP\n", + __func__); + goto out; + } + + clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); + mlx5_unload(dev); + mlx5_function_teardown(dev, false); +out: + mutex_unlock(&dev->intf_state_mutex); +} + +void mlx5_unload_one(struct mlx5_core_dev *dev, bool suspend) +{ + struct devlink *devlink = priv_to_devlink(dev); + + devl_lock(devlink); + mlx5_unload_one_devl_locked(dev, suspend); + devl_unlock(devlink); +} + +/* In case of light probe, we don't need a full query of hca_caps, but only the bellow caps. + * A full query of hca_caps will be done when the device will reload. + */ +static int mlx5_query_hca_caps_light(struct mlx5_core_dev *dev) +{ + int err; + + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL); + if (err) + return err; + + if (MLX5_CAP_GEN(dev, eth_net_offloads)) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_ETHERNET_OFFLOADS, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, nic_flow_table) || + MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_FLOW_TABLE, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + + if (MLX5_CAP_GEN_64(dev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_VDPA_EMULATION, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + + return 0; +} + +int mlx5_init_one_light(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + int err; + + dev->state = MLX5_DEVICE_STATE_UP; + err = mlx5_function_enable(dev, true, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); + if (err) { + mlx5_core_warn(dev, "mlx5_function_enable err=%d\n", err); + goto out; + } + + err = mlx5_query_hca_caps_light(dev); + if (err) { + mlx5_core_warn(dev, "mlx5_query_hca_caps_light err=%d\n", err); + goto query_hca_caps_err; + } + + devl_lock(devlink); + err = mlx5_devlink_params_register(priv_to_devlink(dev)); + devl_unlock(devlink); + if (err) { + mlx5_core_warn(dev, "mlx5_devlink_param_reg err = %d\n", err); + goto query_hca_caps_err; + } + + return 0; + +query_hca_caps_err: + mlx5_function_disable(dev, true); +out: + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + return err; +} + +void mlx5_uninit_one_light(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + + devl_lock(devlink); + mlx5_devlink_params_unregister(priv_to_devlink(dev)); + devl_unlock(devlink); + if (dev->state != MLX5_DEVICE_STATE_UP) + return; + mlx5_function_disable(dev, true); +} + +/* xxx_light() function are used in order to configure the device without full + * init (light init). e.g.: There isn't a point in reload a device to light state. + * Hence, mlx5_load_one_light() isn't needed. + */ + +void mlx5_unload_one_light(struct mlx5_core_dev *dev) +{ + if (dev->state != MLX5_DEVICE_STATE_UP) + return; + mlx5_function_disable(dev, false); + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; +} + +static const int types[] = { + MLX5_CAP_GENERAL, + MLX5_CAP_GENERAL_2, + MLX5_CAP_ETHERNET_OFFLOADS, + MLX5_CAP_IPOIB_ENHANCED_OFFLOADS, + MLX5_CAP_ODP, + MLX5_CAP_ATOMIC, + MLX5_CAP_ROCE, + MLX5_CAP_IPOIB_OFFLOADS, + MLX5_CAP_FLOW_TABLE, + MLX5_CAP_ESWITCH_FLOW_TABLE, + MLX5_CAP_ESWITCH, + MLX5_CAP_QOS, + MLX5_CAP_DEBUG, + MLX5_CAP_DEV_MEM, + MLX5_CAP_DEV_EVENT, + MLX5_CAP_TLS, + MLX5_CAP_VDPA_EMULATION, + MLX5_CAP_IPSEC, + MLX5_CAP_PORT_SELECTION, + MLX5_CAP_MACSEC, + MLX5_CAP_ADV_VIRTUALIZATION, + MLX5_CAP_CRYPTO, +}; + +static void mlx5_hca_caps_free(struct mlx5_core_dev *dev) +{ + int type; + int i; + + for (i = 0; i < ARRAY_SIZE(types); i++) { + type = types[i]; + kfree(dev->caps.hca[type]); + } +} + +static int mlx5_hca_caps_alloc(struct mlx5_core_dev *dev) +{ + struct mlx5_hca_cap *cap; + int type; + int i; + + for (i = 0; i < ARRAY_SIZE(types); i++) { + cap = kzalloc(sizeof(*cap), GFP_KERNEL); + if (!cap) + goto err; + type = types[i]; + dev->caps.hca[type] = cap; + } + + return 0; + +err: + mlx5_hca_caps_free(dev); + return -ENOMEM; +} + +static int vhca_id_show(struct seq_file *file, void *priv) +{ + struct mlx5_core_dev *dev = file->private; + + seq_printf(file, "0x%x\n", MLX5_CAP_GEN(dev, vhca_id)); + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(vhca_id); + +int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) +{ + struct mlx5_priv *priv = &dev->priv; + int err; + + memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile)); + lockdep_register_key(&dev->lock_key); + mutex_init(&dev->intf_state_mutex); + lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key); + mutex_init(&dev->mlx5e_res.uplink_netdev_lock); + + mutex_init(&priv->bfregs.reg_head.lock); + mutex_init(&priv->bfregs.wc_head.lock); + INIT_LIST_HEAD(&priv->bfregs.reg_head.list); + INIT_LIST_HEAD(&priv->bfregs.wc_head.list); + + mutex_init(&priv->alloc_mutex); + mutex_init(&priv->pgdir_mutex); + INIT_LIST_HEAD(&priv->pgdir_list); + + priv->numa_node = dev_to_node(mlx5_core_dma_dev(dev)); + priv->dbg.dbg_root = debugfs_create_dir(dev_name(dev->device), + mlx5_debugfs_root); + debugfs_create_file("vhca_id", 0400, priv->dbg.dbg_root, dev, &vhca_id_fops); + INIT_LIST_HEAD(&priv->traps); + + err = mlx5_cmd_init(dev); + if (err) { + mlx5_core_err(dev, "Failed initializing cmdif SW structs, aborting\n"); + goto err_cmd_init; + } + + err = mlx5_tout_init(dev); + if (err) { + mlx5_core_err(dev, "Failed initializing timeouts, aborting\n"); + goto err_timeout_init; + } + + err = mlx5_health_init(dev); + if (err) + goto err_health_init; + + err = mlx5_pagealloc_init(dev); + if (err) + goto err_pagealloc_init; + + err = mlx5_adev_init(dev); + if (err) + goto err_adev_init; + + err = mlx5_hca_caps_alloc(dev); + if (err) + goto err_hca_caps; + + /* The conjunction of sw_vhca_id with sw_owner_id will be a global + * unique id per function which uses mlx5_core. + * Those values are supplied to FW as part of the init HCA command to + * be used by both driver and FW when it's applicable. + */ + dev->priv.sw_vhca_id = ida_alloc_range(&sw_vhca_ida, 1, + MAX_SW_VHCA_ID, + GFP_KERNEL); + if (dev->priv.sw_vhca_id < 0) + mlx5_core_err(dev, "failed to allocate sw_vhca_id, err=%d\n", + dev->priv.sw_vhca_id); + + return 0; + +err_hca_caps: + mlx5_adev_cleanup(dev); +err_adev_init: + mlx5_pagealloc_cleanup(dev); +err_pagealloc_init: + mlx5_health_cleanup(dev); +err_health_init: + mlx5_tout_cleanup(dev); +err_timeout_init: + mlx5_cmd_cleanup(dev); +err_cmd_init: + debugfs_remove(dev->priv.dbg.dbg_root); + mutex_destroy(&priv->pgdir_mutex); + mutex_destroy(&priv->alloc_mutex); + mutex_destroy(&priv->bfregs.wc_head.lock); + mutex_destroy(&priv->bfregs.reg_head.lock); + mutex_destroy(&dev->intf_state_mutex); + lockdep_unregister_key(&dev->lock_key); + return err; +} + +void mlx5_mdev_uninit(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + + if (priv->sw_vhca_id > 0) + ida_free(&sw_vhca_ida, dev->priv.sw_vhca_id); + + mlx5_hca_caps_free(dev); + mlx5_adev_cleanup(dev); + mlx5_pagealloc_cleanup(dev); + mlx5_health_cleanup(dev); + mlx5_tout_cleanup(dev); + mlx5_cmd_cleanup(dev); + debugfs_remove_recursive(dev->priv.dbg.dbg_root); + mutex_destroy(&priv->pgdir_mutex); + mutex_destroy(&priv->alloc_mutex); + mutex_destroy(&priv->bfregs.wc_head.lock); + mutex_destroy(&priv->bfregs.reg_head.lock); + mutex_destroy(&dev->mlx5e_res.uplink_netdev_lock); + mutex_destroy(&dev->intf_state_mutex); + lockdep_unregister_key(&dev->lock_key); +} + +static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct mlx5_core_dev *dev; + struct devlink *devlink; + int err; + + devlink = mlx5_devlink_alloc(&pdev->dev); + if (!devlink) { + dev_err(&pdev->dev, "devlink alloc failed\n"); + return -ENOMEM; + } + + dev = devlink_priv(devlink); + dev->device = &pdev->dev; + dev->pdev = pdev; + + dev->coredev_type = id->driver_data & MLX5_PCI_DEV_IS_VF ? + MLX5_COREDEV_VF : MLX5_COREDEV_PF; + + dev->priv.adev_idx = mlx5_adev_idx_alloc(); + if (dev->priv.adev_idx < 0) { + err = dev->priv.adev_idx; + goto adev_init_err; + } + + err = mlx5_mdev_init(dev, prof_sel); + if (err) + goto mdev_init_err; + + err = mlx5_pci_init(dev, pdev, id); + if (err) { + mlx5_core_err(dev, "mlx5_pci_init failed with error code %d\n", + err); + goto pci_init_err; + } + + err = mlx5_init_one(dev); + if (err) { + mlx5_core_err(dev, "mlx5_init_one failed with error code %d\n", + err); + goto err_init_one; + } + + err = mlx5_crdump_enable(dev); + if (err) + dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err); + + err = mlx5_hwmon_dev_register(dev); + if (err) + mlx5_core_err(dev, "mlx5_hwmon_dev_register failed with error code %d\n", err); + + pci_save_state(pdev); + devlink_register(devlink); + return 0; + +err_init_one: + mlx5_pci_close(dev); +pci_init_err: + mlx5_mdev_uninit(dev); +mdev_init_err: + mlx5_adev_idx_free(dev->priv.adev_idx); +adev_init_err: + mlx5_devlink_free(devlink); + + return err; +} + +static void remove_one(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct devlink *devlink = priv_to_devlink(dev); + + set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); + /* mlx5_drain_fw_reset() and mlx5_drain_health_wq() are using + * devlink notify APIs. + * Hence, we must drain them before unregistering the devlink. + */ + mlx5_drain_fw_reset(dev); + mlx5_drain_health_wq(dev); + devlink_unregister(devlink); + mlx5_sriov_disable(pdev, false); + mlx5_hwmon_dev_unregister(dev); + mlx5_crdump_disable(dev); + mlx5_uninit_one(dev); + mlx5_pci_close(dev); + mlx5_mdev_uninit(dev); + mlx5_adev_idx_free(dev->priv.adev_idx); + mlx5_devlink_free(devlink); +} + +#define mlx5_pci_trace(dev, fmt, ...) ({ \ + struct mlx5_core_dev *__dev = (dev); \ + mlx5_core_info(__dev, "%s Device state = %d health sensors: %d pci_status: %d. " fmt, \ + __func__, __dev->state, mlx5_health_check_fatal_sensors(__dev), \ + __dev->pci_status, ##__VA_ARGS__); \ +}) + +static const char *result2str(enum pci_ers_result result) +{ + return result == PCI_ERS_RESULT_NEED_RESET ? "need reset" : + result == PCI_ERS_RESULT_DISCONNECT ? "disconnect" : + result == PCI_ERS_RESULT_RECOVERED ? "recovered" : + "unknown"; +} + +static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + enum pci_ers_result res; + + mlx5_pci_trace(dev, "Enter, pci channel state = %d\n", state); + + mlx5_enter_error_state(dev, false); + mlx5_error_sw_reset(dev); + mlx5_unload_one(dev, false); + mlx5_drain_health_wq(dev); + mlx5_pci_disable_device(dev); + + res = state == pci_channel_io_perm_failure ? + PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; + + mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, result = %d, %s\n", + __func__, dev->state, dev->pci_status, res, result2str(res)); + return res; +} + +/* wait for the device to show vital signs by waiting + * for the health counter to start counting. + */ +static int wait_vital(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_health *health = &dev->priv.health; + const int niter = 100; + u32 last_count = 0; + u32 count; + int i; + + for (i = 0; i < niter; i++) { + count = ioread32be(health->health_counter); + if (count && count != 0xffffffff) { + if (last_count && last_count != count) { + mlx5_core_info(dev, + "wait vital counter value 0x%x after %d iterations\n", + count, i); + return 0; + } + last_count = count; + } + msleep(50); + } + + return -ETIMEDOUT; +} + +static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) +{ + enum pci_ers_result res = PCI_ERS_RESULT_DISCONNECT; + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + int err; + + mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Enter\n", + __func__, dev->state, dev->pci_status); + + err = mlx5_pci_enable_device(dev); + if (err) { + mlx5_core_err(dev, "%s: mlx5_pci_enable_device failed with error code: %d\n", + __func__, err); + goto out; + } + + pci_set_master(pdev); + pci_restore_state(pdev); + pci_save_state(pdev); + + err = wait_vital(pdev); + if (err) { + mlx5_core_err(dev, "%s: wait vital failed with error code: %d\n", + __func__, err); + goto out; + } + + res = PCI_ERS_RESULT_RECOVERED; +out: + mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, err = %d, result = %d, %s\n", + __func__, dev->state, dev->pci_status, err, res, result2str(res)); + return res; +} + +static void mlx5_pci_resume(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + int err; + + mlx5_pci_trace(dev, "Enter, loading driver..\n"); + + err = mlx5_load_one(dev, false); + + if (!err) + devlink_health_reporter_state_update(dev->priv.health.fw_fatal_reporter, + DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); + + mlx5_pci_trace(dev, "Done, err = %d, device %s\n", err, + !err ? "recovered" : "Failed"); +} + +static const struct pci_error_handlers mlx5_err_handler = { + .error_detected = mlx5_pci_err_detected, + .slot_reset = mlx5_pci_slot_reset, + .resume = mlx5_pci_resume +}; + +static int mlx5_try_fast_unload(struct mlx5_core_dev *dev) +{ + bool fast_teardown = false, force_teardown = false; + int ret = 1; + + fast_teardown = MLX5_CAP_GEN(dev, fast_teardown); + force_teardown = MLX5_CAP_GEN(dev, force_teardown); + + mlx5_core_dbg(dev, "force teardown firmware support=%d\n", force_teardown); + mlx5_core_dbg(dev, "fast teardown firmware support=%d\n", fast_teardown); + + if (!fast_teardown && !force_teardown) + return -EOPNOTSUPP; + + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + mlx5_core_dbg(dev, "Device in internal error state, giving up\n"); + return -EAGAIN; + } + + /* Panic tear down fw command will stop the PCI bus communication + * with the HCA, so the health poll is no longer needed. + */ + mlx5_drain_health_wq(dev); + mlx5_stop_health_poll(dev, false); + + ret = mlx5_cmd_fast_teardown_hca(dev); + if (!ret) + goto succeed; + + ret = mlx5_cmd_force_teardown_hca(dev); + if (!ret) + goto succeed; + + mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret); + mlx5_start_health_poll(dev); + return ret; + +succeed: + mlx5_enter_error_state(dev, true); + + /* Some platforms requiring freeing the IRQ's in the shutdown + * flow. If they aren't freed they can't be allocated after + * kexec. There is no need to cleanup the mlx5_core software + * contexts. + */ + mlx5_core_eq_free_irqs(dev); + + return 0; +} + +static void shutdown(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + int err; + + mlx5_core_info(dev, "Shutdown was called\n"); + set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); + err = mlx5_try_fast_unload(dev); + if (err) + mlx5_unload_one(dev, false); + mlx5_pci_disable_device(dev); +} + +static int mlx5_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + + mlx5_unload_one(dev, true); + + return 0; +} + +static int mlx5_resume(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + + return mlx5_load_one(dev, false); +} + +static const struct pci_device_id mlx5_core_pci_table[] = { + { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTIB) }, + { PCI_VDEVICE(MELLANOX, 0x1012), MLX5_PCI_DEV_IS_VF}, /* Connect-IB VF */ + { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4) }, + { PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4 VF */ + { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX) }, + { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4LX VF */ + { PCI_VDEVICE(MELLANOX, 0x1017) }, /* ConnectX-5, PCIe 3.0 */ + { PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 VF */ + { PCI_VDEVICE(MELLANOX, 0x1019) }, /* ConnectX-5 Ex */ + { PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 Ex VF */ + { PCI_VDEVICE(MELLANOX, 0x101b) }, /* ConnectX-6 */ + { PCI_VDEVICE(MELLANOX, 0x101c), MLX5_PCI_DEV_IS_VF}, /* ConnectX-6 VF */ + { PCI_VDEVICE(MELLANOX, 0x101d) }, /* ConnectX-6 Dx */ + { PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF}, /* ConnectX Family mlx5Gen Virtual Function */ + { PCI_VDEVICE(MELLANOX, 0x101f) }, /* ConnectX-6 LX */ + { PCI_VDEVICE(MELLANOX, 0x1021) }, /* ConnectX-7 */ + { PCI_VDEVICE(MELLANOX, 0x1023) }, /* ConnectX-8 */ + { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */ + { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */ + { PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */ + { PCI_VDEVICE(MELLANOX, 0xa2dc) }, /* BlueField-3 integrated ConnectX-7 network controller */ + { PCI_VDEVICE(MELLANOX, 0xa2df) }, /* BlueField-4 integrated ConnectX-8 network controller */ + { 0, } +}; + +MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table); + +void mlx5_disable_device(struct mlx5_core_dev *dev) +{ + mlx5_error_sw_reset(dev); + mlx5_unload_one_devl_locked(dev, false); +} + +int mlx5_recover_device(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_sf(dev)) { + mlx5_pci_disable_device(dev); + if (mlx5_pci_slot_reset(dev->pdev) != PCI_ERS_RESULT_RECOVERED) + return -EIO; + } + + return mlx5_load_one_devl_locked(dev, true); +} + +static struct pci_driver mlx5_core_driver = { + .name = KBUILD_MODNAME, + .id_table = mlx5_core_pci_table, + .probe = probe_one, + .remove = remove_one, + .suspend = mlx5_suspend, + .resume = mlx5_resume, + .shutdown = shutdown, + .err_handler = &mlx5_err_handler, + .sriov_configure = mlx5_core_sriov_configure, + .sriov_get_vf_total_msix = mlx5_sriov_get_vf_total_msix, + .sriov_set_msix_vec_count = mlx5_core_sriov_set_msix_vec_count, +}; + +/** + * mlx5_vf_get_core_dev - Get the mlx5 core device from a given VF PCI device if + * mlx5_core is its driver. + * @pdev: The associated PCI device. + * + * Upon return the interface state lock stay held to let caller uses it safely. + * Caller must ensure to use the returned mlx5 device for a narrow window + * and put it back with mlx5_vf_put_core_dev() immediately once usage was over. + * + * Return: Pointer to the associated mlx5_core_dev or NULL. + */ +struct mlx5_core_dev *mlx5_vf_get_core_dev(struct pci_dev *pdev) +{ + struct mlx5_core_dev *mdev; + + mdev = pci_iov_get_pf_drvdata(pdev, &mlx5_core_driver); + if (IS_ERR(mdev)) + return NULL; + + mutex_lock(&mdev->intf_state_mutex); + if (!test_bit(MLX5_INTERFACE_STATE_UP, &mdev->intf_state)) { + mutex_unlock(&mdev->intf_state_mutex); + return NULL; + } + + return mdev; +} +EXPORT_SYMBOL(mlx5_vf_get_core_dev); + +/** + * mlx5_vf_put_core_dev - Put the mlx5 core device back. + * @mdev: The mlx5 core device. + * + * Upon return the interface state lock is unlocked and caller should not + * access the mdev any more. + */ +void mlx5_vf_put_core_dev(struct mlx5_core_dev *mdev) +{ + mutex_unlock(&mdev->intf_state_mutex); +} +EXPORT_SYMBOL(mlx5_vf_put_core_dev); + +static void mlx5_core_verify_params(void) +{ + if (prof_sel >= ARRAY_SIZE(profile)) { + pr_warn("mlx5_core: WARNING: Invalid module parameter prof_sel %d, valid range 0-%zu, changing back to default(%d)\n", + prof_sel, + ARRAY_SIZE(profile) - 1, + MLX5_DEFAULT_PROF); + prof_sel = MLX5_DEFAULT_PROF; + } +} + +static int __init mlx5_init(void) +{ + int err; + + WARN_ONCE(strcmp(MLX5_ADEV_NAME, KBUILD_MODNAME), + "mlx5_core name not in sync with kernel module name"); + + get_random_bytes(&sw_owner_id, sizeof(sw_owner_id)); + + mlx5_core_verify_params(); + mlx5_register_debugfs(); + + err = mlx5e_init(); + if (err) + goto err_debug; + + err = mlx5_sf_driver_register(); + if (err) + goto err_sf; + + err = pci_register_driver(&mlx5_core_driver); + if (err) + goto err_pci; + + return 0; + +err_pci: + mlx5_sf_driver_unregister(); +err_sf: + mlx5e_cleanup(); +err_debug: + mlx5_unregister_debugfs(); + return err; +} + +static void __exit mlx5_cleanup(void) +{ + pci_unregister_driver(&mlx5_core_driver); + mlx5_sf_driver_unregister(); + mlx5e_cleanup(); + mlx5_unregister_debugfs(); +} + +module_init(mlx5_init); +module_exit(mlx5_cleanup); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c new file mode 100644 index 0000000000..495cca58dc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include "mlx5_core.h" + +int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn) +{ + u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {}; + void *gid; + + MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG); + MLX5_SET(attach_to_mcg_in, in, qpn, qpn); + gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid); + memcpy(gid, mgid, sizeof(*mgid)); + return mlx5_cmd_exec_in(dev, attach_to_mcg, in); +} +EXPORT_SYMBOL(mlx5_core_attach_mcg); + +int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn) +{ + u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {}; + void *gid; + + MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG); + MLX5_SET(detach_from_mcg_in, in, qpn, qpn); + gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid); + memcpy(gid, mgid, sizeof(*mgid)); + return mlx5_cmd_exec_in(dev, detach_from_mcg, in); +} +EXPORT_SYMBOL(mlx5_core_detach_mcg); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h new file mode 100644 index 0000000000..124352459c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -0,0 +1,374 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_CORE_H__ +#define __MLX5_CORE_H__ + +#include +#include +#include +#include +#include +#include +#include +#include + +extern uint mlx5_core_debug_mask; + +#define mlx5_core_dbg(__dev, format, ...) \ + dev_dbg((__dev)->device, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +#define mlx5_core_dbg_once(__dev, format, ...) \ + dev_dbg_once((__dev)->device, \ + "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +#define mlx5_core_dbg_mask(__dev, mask, format, ...) \ +do { \ + if ((mask) & mlx5_core_debug_mask) \ + mlx5_core_dbg(__dev, format, ##__VA_ARGS__); \ +} while (0) + +#define mlx5_core_err(__dev, format, ...) \ + dev_err((__dev)->device, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +#define mlx5_core_err_rl(__dev, format, ...) \ + dev_err_ratelimited((__dev)->device, \ + "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +#define mlx5_core_warn(__dev, format, ...) \ + dev_warn((__dev)->device, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +#define mlx5_core_warn_once(__dev, format, ...) \ + dev_warn_once((__dev)->device, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +#define mlx5_core_warn_rl(__dev, format, ...) \ + dev_warn_ratelimited((__dev)->device, \ + "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +#define mlx5_core_info(__dev, format, ...) \ + dev_info((__dev)->device, format, ##__VA_ARGS__) + +#define mlx5_core_info_rl(__dev, format, ...) \ + dev_info_ratelimited((__dev)->device, \ + "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +static inline void mlx5_printk(struct mlx5_core_dev *dev, int level, const char *format, ...) +{ + struct device *device = dev->device; + struct va_format vaf; + va_list args; + + if (WARN_ONCE(level < LOGLEVEL_EMERG || level > LOGLEVEL_DEBUG, + "Level %d is out of range, set to default level\n", level)) + level = LOGLEVEL_DEFAULT; + + va_start(args, format); + vaf.fmt = format; + vaf.va = &args; + + dev_printk_emit(level, device, "%s %s: %pV", dev_driver_string(device), dev_name(device), + &vaf); + va_end(args); +} + +#define mlx5_log(__dev, level, format, ...) \ + mlx5_printk(__dev, level, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + +static inline struct device *mlx5_core_dma_dev(struct mlx5_core_dev *dev) +{ + return &dev->pdev->dev; +} + +enum { + MLX5_CMD_DATA, /* print command payload only */ + MLX5_CMD_TIME, /* print command execution time */ +}; + +enum { + MLX5_DRIVER_STATUS_ABORTED = 0xfe, + MLX5_DRIVER_SYND = 0xbadd00de, +}; + +enum mlx5_semaphore_space_address { + MLX5_SEMAPHORE_SPACE_DOMAIN = 0xA, + MLX5_SEMAPHORE_SW_RESET = 0x20, +}; + +#define MLX5_DEFAULT_PROF 2 +#define MLX5_SF_PROF 3 + +static inline int mlx5_flexible_inlen(struct mlx5_core_dev *dev, size_t fixed, + size_t item_size, size_t num_items, + const char *func, int line) +{ + int inlen; + + if (fixed > INT_MAX || item_size > INT_MAX || num_items > INT_MAX) { + mlx5_core_err(dev, "%s: %s:%d: input values too big: %zu + %zu * %zu\n", + __func__, func, line, fixed, item_size, num_items); + return -ENOMEM; + } + + if (check_mul_overflow((int)item_size, (int)num_items, &inlen)) { + mlx5_core_err(dev, "%s: %s:%d: multiplication overflow: %zu + %zu * %zu\n", + __func__, func, line, fixed, item_size, num_items); + return -ENOMEM; + } + + if (check_add_overflow((int)fixed, inlen, &inlen)) { + mlx5_core_err(dev, "%s: %s:%d: addition overflow: %zu + %zu * %zu\n", + __func__, func, line, fixed, item_size, num_items); + return -ENOMEM; + } + + return inlen; +} + +#define MLX5_FLEXIBLE_INLEN(dev, fixed, item_size, num_items) \ + mlx5_flexible_inlen(dev, fixed, item_size, num_items, __func__, __LINE__) + +int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); +int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type, + enum mlx5_cap_mode cap_mode); +int mlx5_query_hca_caps(struct mlx5_core_dev *dev); +int mlx5_query_board_id(struct mlx5_core_dev *dev); +int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num); +int mlx5_cmd_init(struct mlx5_core_dev *dev); +void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); +int mlx5_cmd_enable(struct mlx5_core_dev *dev); +void mlx5_cmd_disable(struct mlx5_core_dev *dev); +void mlx5_cmd_set_state(struct mlx5_core_dev *dev, + enum mlx5_cmdif_state cmdif_state); +int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id); +int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); +int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev); +int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev); +void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force); +void mlx5_error_sw_reset(struct mlx5_core_dev *dev); +u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev); +int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev); +void mlx5_disable_device(struct mlx5_core_dev *dev); +int mlx5_recover_device(struct mlx5_core_dev *dev); +int mlx5_sriov_init(struct mlx5_core_dev *dev); +void mlx5_sriov_cleanup(struct mlx5_core_dev *dev); +int mlx5_sriov_attach(struct mlx5_core_dev *dev); +void mlx5_sriov_detach(struct mlx5_core_dev *dev); +int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); +void mlx5_sriov_disable(struct pci_dev *pdev, bool num_vf_change); +int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count); +int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); +int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); +int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + void *context, u32 *element_id); +int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + void *context, u32 element_id, + u32 modify_bitmask); +int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + u32 element_id); +int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages); + +void mlx5_cmd_flush(struct mlx5_core_dev *dev); +void mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); + +int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group, + u8 access_reg_group); +int mlx5_query_mcam_reg(struct mlx5_core_dev *dev, u32 *mcap, u8 feature_group, + u8 access_reg_group); +int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam, + u8 feature_group, u8 access_reg_group); + +void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, struct net_device *netdev); +void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, struct net_device *netdev); +void mlx5_lag_add_mdev(struct mlx5_core_dev *dev); +void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev); +void mlx5_lag_disable_change(struct mlx5_core_dev *dev); +void mlx5_lag_enable_change(struct mlx5_core_dev *dev); + +int mlx5_events_init(struct mlx5_core_dev *dev); +void mlx5_events_cleanup(struct mlx5_core_dev *dev); +void mlx5_events_start(struct mlx5_core_dev *dev); +void mlx5_events_stop(struct mlx5_core_dev *dev); + +int mlx5_adev_idx_alloc(void); +void mlx5_adev_idx_free(int idx); +void mlx5_adev_cleanup(struct mlx5_core_dev *dev); +int mlx5_adev_init(struct mlx5_core_dev *dev); + +int mlx5_attach_device(struct mlx5_core_dev *dev); +void mlx5_detach_device(struct mlx5_core_dev *dev, bool suspend); +int mlx5_register_device(struct mlx5_core_dev *dev); +void mlx5_unregister_device(struct mlx5_core_dev *dev); +void mlx5_dev_set_lightweight(struct mlx5_core_dev *dev); +bool mlx5_dev_is_lightweight(struct mlx5_core_dev *dev); +struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev); +void mlx5_dev_list_lock(void); +void mlx5_dev_list_unlock(void); +int mlx5_dev_list_trylock(void); + +void mlx5_fw_reporters_create(struct mlx5_core_dev *dev); +int mlx5_query_mtpps(struct mlx5_core_dev *dev, u32 *mtpps, u32 mtpps_size); +int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size); +int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode); +int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode); + +struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev); +void mlx5_dm_cleanup(struct mlx5_core_dev *dev); + +#define MLX5_PPS_CAP(mdev) (MLX5_CAP_GEN((mdev), pps) && \ + MLX5_CAP_GEN((mdev), pps_modify) && \ + MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) && \ + MLX5_CAP_MCAM_FEATURE((mdev), mtpps_enh_out_per_adj)) + +int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw, + struct netlink_ext_ack *extack); +int mlx5_fw_version_query(struct mlx5_core_dev *dev, + u32 *running_ver, u32 *stored_ver); + +#ifdef CONFIG_MLX5_CORE_EN +int mlx5e_init(void); +void mlx5e_cleanup(void); +#else +static inline int mlx5e_init(void){ return 0; } +static inline void mlx5e_cleanup(void){} +#endif + +static inline bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) +{ + return pci_num_vf(dev->pdev) ? true : false; +} + +int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev); +static inline int mlx5_rescan_drivers(struct mlx5_core_dev *dev) +{ + int ret; + + mlx5_dev_list_lock(); + ret = mlx5_rescan_drivers_locked(dev); + mlx5_dev_list_unlock(); + return ret; +} + +void mlx5_lag_update(struct mlx5_core_dev *dev); + +enum { + MLX5_NIC_IFC_FULL = 0, + MLX5_NIC_IFC_DISABLED = 1, + MLX5_NIC_IFC_NO_DRAM_NIC = 2, + MLX5_NIC_IFC_SW_RESET = 7 +}; + +u8 mlx5_get_nic_state(struct mlx5_core_dev *dev); +void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state); + +static inline bool mlx5_core_is_sf(const struct mlx5_core_dev *dev) +{ + return dev->coredev_type == MLX5_COREDEV_SF; +} + +int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx); +void mlx5_mdev_uninit(struct mlx5_core_dev *dev); +int mlx5_init_one(struct mlx5_core_dev *dev); +int mlx5_init_one_devl_locked(struct mlx5_core_dev *dev); +void mlx5_uninit_one(struct mlx5_core_dev *dev); +void mlx5_unload_one(struct mlx5_core_dev *dev, bool suspend); +void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev, bool suspend); +int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery); +int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery); +int mlx5_init_one_light(struct mlx5_core_dev *dev); +void mlx5_uninit_one_light(struct mlx5_core_dev *dev); +void mlx5_unload_one_light(struct mlx5_core_dev *dev); + +int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 vport, + u16 opmod); +#define mlx5_vport_get_other_func_general_cap(dev, vport, out) \ + mlx5_vport_get_other_func_cap(dev, vport, out, MLX5_CAP_GENERAL) + +void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work); +static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + + return MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); +} + +bool mlx5_eth_supported(struct mlx5_core_dev *dev); +bool mlx5_rdma_supported(struct mlx5_core_dev *dev); +bool mlx5_vnet_supported(struct mlx5_core_dev *dev); +bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev); + +static inline u16 mlx5_core_ec_vf_vport_base(const struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN_2(dev, ec_vf_vport_base); +} + +static inline u16 mlx5_core_ec_sriov_enabled(const struct mlx5_core_dev *dev) +{ + return mlx5_core_is_ecpf(dev) && mlx5_core_ec_vf_vport_base(dev); +} + +static inline bool mlx5_core_is_ec_vf_vport(const struct mlx5_core_dev *dev, u16 vport_num) +{ + int base_vport = mlx5_core_ec_vf_vport_base(dev); + int max_vport = base_vport + mlx5_core_max_ec_vfs(dev); + + if (!mlx5_core_ec_sriov_enabled(dev)) + return false; + + return (vport_num >= base_vport && vport_num < max_vport); +} + +static inline int mlx5_vport_to_func_id(const struct mlx5_core_dev *dev, u16 vport, bool ec_vf_func) +{ + return ec_vf_func ? vport - mlx5_core_ec_vf_vport_base(dev) + 1 + : vport; +} + +#endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h new file mode 100644 index 0000000000..1088114e90 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5_IRQ_H__ +#define __MLX5_IRQ_H__ + +#include + +#define MLX5_COMP_EQS_PER_SF 8 + +struct mlx5_irq; +struct cpu_rmap; + +int mlx5_irq_table_init(struct mlx5_core_dev *dev); +void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); +int mlx5_irq_table_create(struct mlx5_core_dev *dev); +void mlx5_irq_table_destroy(struct mlx5_core_dev *dev); +void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev); +int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table); +int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table); +struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev); + +int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn, + int msix_vec_count); +int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs); + +struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev); +void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq); +struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx, + struct irq_affinity_desc *af_desc, + struct cpu_rmap **rmap); +struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu, + u16 vecidx, struct cpu_rmap **rmap); +void mlx5_irq_release_vector(struct mlx5_irq *irq); +int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb); +int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb); +struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq); +int mlx5_irq_get_index(struct mlx5_irq *irq); + +struct mlx5_irq_pool; +#ifdef CONFIG_MLX5_SF +struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev, + struct cpumask *used_cpus, u16 vecidx); +struct mlx5_irq *mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, + struct irq_affinity_desc *af_desc); +void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *irq); +#else +static inline +struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev, + struct cpumask *used_cpus, u16 vecidx) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline struct mlx5_irq * +mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline +void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *irq) +{ +} +#endif +#endif /* __MLX5_IRQ_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c new file mode 100644 index 0000000000..678f0be813 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include "mlx5_core.h" + +int mlx5_core_create_mkey(struct mlx5_core_dev *dev, u32 *mkey, u32 *in, + int inlen) +{ + u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {}; + u32 mkey_index; + int err; + + MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); + + err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout)); + if (err) + return err; + + mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); + *mkey = MLX5_GET(create_mkey_in, in, memory_key_mkey_entry.mkey_7_0) | + mlx5_idx_to_mkey(mkey_index); + + mlx5_core_dbg(dev, "out 0x%x, mkey 0x%x\n", mkey_index, *mkey); + return 0; +} +EXPORT_SYMBOL(mlx5_core_create_mkey); + +int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, u32 mkey) +{ + u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {}; + + MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY); + MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey)); + return mlx5_cmd_exec_in(dev, destroy_mkey, in); +} +EXPORT_SYMBOL(mlx5_core_destroy_mkey); + +int mlx5_core_query_mkey(struct mlx5_core_dev *dev, u32 mkey, u32 *out, + int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_mkey_in)] = {}; + + memset(out, 0, outlen); + MLX5_SET(query_mkey_in, in, opcode, MLX5_CMD_OP_QUERY_MKEY); + MLX5_SET(query_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} +EXPORT_SYMBOL(mlx5_core_query_mkey); + +static inline u32 mlx5_get_psv(u32 *out, int psv_index) +{ + switch (psv_index) { + case 1: return MLX5_GET(create_psv_out, out, psv1_index); + case 2: return MLX5_GET(create_psv_out, out, psv2_index); + case 3: return MLX5_GET(create_psv_out, out, psv3_index); + default: return MLX5_GET(create_psv_out, out, psv0_index); + } +} + +int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn, + int npsvs, u32 *sig_index) +{ + u32 out[MLX5_ST_SZ_DW(create_psv_out)] = {}; + u32 in[MLX5_ST_SZ_DW(create_psv_in)] = {}; + int i, err; + + if (npsvs > MLX5_MAX_PSVS) + return -EINVAL; + + MLX5_SET(create_psv_in, in, opcode, MLX5_CMD_OP_CREATE_PSV); + MLX5_SET(create_psv_in, in, pd, pdn); + MLX5_SET(create_psv_in, in, num_psv, npsvs); + + err = mlx5_cmd_exec_inout(dev, create_psv, in, out); + if (err) + return err; + + for (i = 0; i < npsvs; i++) + sig_index[i] = mlx5_get_psv(out, i); + + return err; +} +EXPORT_SYMBOL(mlx5_core_create_psv); + +int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num) +{ + u32 in[MLX5_ST_SZ_DW(destroy_psv_in)] = {}; + + MLX5_SET(destroy_psv_in, in, opcode, MLX5_CMD_OP_DESTROY_PSV); + MLX5_SET(destroy_psv_in, in, psvn, psv_num); + return mlx5_cmd_exec_in(dev, destroy_psv, in); +} +EXPORT_SYMBOL(mlx5_core_destroy_psv); + +__be32 mlx5_core_get_terminate_scatter_list_mkey(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {}; + u32 mkey; + + if (!MLX5_CAP_GEN(dev, terminate_scatter_list_mkey)) + return MLX5_TERMINATE_SCATTER_LIST_LKEY; + + MLX5_SET(query_special_contexts_in, in, opcode, + MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); + if (mlx5_cmd_exec_inout(dev, query_special_contexts, in, out)) + return MLX5_TERMINATE_SCATTER_LIST_LKEY; + + mkey = MLX5_GET(query_special_contexts_out, out, + terminate_scatter_list_mkey); + return cpu_to_be32(mkey); +} +EXPORT_SYMBOL(mlx5_core_get_terminate_scatter_list_mkey); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c new file mode 100644 index 0000000000..dcf58efac1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -0,0 +1,805 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include "mlx5_core.h" +#include "lib/eq.h" +#include "lib/tout.h" + +enum { + MLX5_PAGES_CANT_GIVE = 0, + MLX5_PAGES_GIVE = 1, + MLX5_PAGES_TAKE = 2 +}; + +struct mlx5_pages_req { + struct mlx5_core_dev *dev; + u16 func_id; + u8 ec_function; + s32 npages; + struct work_struct work; + u8 release_all; +}; + +struct fw_page { + struct rb_node rb_node; + u64 addr; + struct page *page; + u32 function; + unsigned long bitmask; + struct list_head list; + unsigned int free_count; +}; + +enum { + MLX5_MAX_RECLAIM_TIME_MILI = 5000, + MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE, +}; + +static u32 get_function(u16 func_id, bool ec_function) +{ + return (u32)func_id | (ec_function << 16); +} + +static u16 func_id_to_type(struct mlx5_core_dev *dev, u16 func_id, bool ec_function) +{ + if (!func_id) + return mlx5_core_is_ecpf(dev) && !ec_function ? MLX5_HOST_PF : MLX5_PF; + + if (func_id <= max(mlx5_core_max_vfs(dev), mlx5_core_max_ec_vfs(dev))) { + if (ec_function) + return MLX5_EC_VF; + else + return MLX5_VF; + } + return MLX5_SF; +} + +static u32 mlx5_get_ec_function(u32 function) +{ + return function >> 16; +} + +static u32 mlx5_get_func_id(u32 function) +{ + return function & 0xffff; +} + +static struct rb_root *page_root_per_function(struct mlx5_core_dev *dev, u32 function) +{ + struct rb_root *root; + int err; + + root = xa_load(&dev->priv.page_root_xa, function); + if (root) + return root; + + root = kzalloc(sizeof(*root), GFP_KERNEL); + if (!root) + return ERR_PTR(-ENOMEM); + + err = xa_insert(&dev->priv.page_root_xa, function, root, GFP_KERNEL); + if (err) { + kfree(root); + return ERR_PTR(err); + } + + *root = RB_ROOT; + + return root; +} + +static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u32 function) +{ + struct rb_node *parent = NULL; + struct rb_root *root; + struct rb_node **new; + struct fw_page *nfp; + struct fw_page *tfp; + int i; + + root = page_root_per_function(dev, function); + if (IS_ERR(root)) + return PTR_ERR(root); + + new = &root->rb_node; + + while (*new) { + parent = *new; + tfp = rb_entry(parent, struct fw_page, rb_node); + if (tfp->addr < addr) + new = &parent->rb_left; + else if (tfp->addr > addr) + new = &parent->rb_right; + else + return -EEXIST; + } + + nfp = kzalloc(sizeof(*nfp), GFP_KERNEL); + if (!nfp) + return -ENOMEM; + + nfp->addr = addr; + nfp->page = page; + nfp->function = function; + nfp->free_count = MLX5_NUM_4K_IN_PAGE; + for (i = 0; i < MLX5_NUM_4K_IN_PAGE; i++) + set_bit(i, &nfp->bitmask); + + rb_link_node(&nfp->rb_node, parent, new); + rb_insert_color(&nfp->rb_node, root); + list_add(&nfp->list, &dev->priv.free_list); + + return 0; +} + +static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr, + u32 function) +{ + struct fw_page *result = NULL; + struct rb_root *root; + struct rb_node *tmp; + struct fw_page *tfp; + + root = xa_load(&dev->priv.page_root_xa, function); + if (WARN_ON_ONCE(!root)) + return NULL; + + tmp = root->rb_node; + + while (tmp) { + tfp = rb_entry(tmp, struct fw_page, rb_node); + if (tfp->addr < addr) { + tmp = tmp->rb_left; + } else if (tfp->addr > addr) { + tmp = tmp->rb_right; + } else { + result = tfp; + break; + } + } + + return result; +} + +static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, + s32 *npages, int boot) +{ + u32 out[MLX5_ST_SZ_DW(query_pages_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_pages_in)] = {}; + int err; + + MLX5_SET(query_pages_in, in, opcode, MLX5_CMD_OP_QUERY_PAGES); + MLX5_SET(query_pages_in, in, op_mod, boot ? + MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES : + MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES); + MLX5_SET(query_pages_in, in, embedded_cpu_function, mlx5_core_is_ecpf(dev)); + + err = mlx5_cmd_exec_inout(dev, query_pages, in, out); + if (err) + return err; + + *npages = MLX5_GET(query_pages_out, out, num_pages); + *func_id = MLX5_GET(query_pages_out, out, function_id); + + return err; +} + +static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u32 function) +{ + struct fw_page *fp = NULL; + struct fw_page *iter; + unsigned n; + + list_for_each_entry(iter, &dev->priv.free_list, list) { + if (iter->function != function) + continue; + fp = iter; + } + + if (list_empty(&dev->priv.free_list) || !fp) + return -ENOMEM; + + n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask)); + if (n >= MLX5_NUM_4K_IN_PAGE) { + mlx5_core_warn(dev, "alloc 4k bug: fw page = 0x%llx, n = %u, bitmask: %lu, max num of 4K pages: %d\n", + fp->addr, n, fp->bitmask, MLX5_NUM_4K_IN_PAGE); + return -ENOENT; + } + clear_bit(n, &fp->bitmask); + fp->free_count--; + if (!fp->free_count) + list_del(&fp->list); + + *addr = fp->addr + n * MLX5_ADAPTER_PAGE_SIZE; + + return 0; +} + +#define MLX5_U64_4K_PAGE_MASK ((~(u64)0U) << PAGE_SHIFT) + +static void free_fwp(struct mlx5_core_dev *dev, struct fw_page *fwp, + bool in_free_list) +{ + struct rb_root *root; + + root = xa_load(&dev->priv.page_root_xa, fwp->function); + if (WARN_ON_ONCE(!root)) + return; + + rb_erase(&fwp->rb_node, root); + if (in_free_list) + list_del(&fwp->list); + dma_unmap_page(mlx5_core_dma_dev(dev), fwp->addr & MLX5_U64_4K_PAGE_MASK, + PAGE_SIZE, DMA_BIDIRECTIONAL); + __free_page(fwp->page); + kfree(fwp); +} + +static void free_4k(struct mlx5_core_dev *dev, u64 addr, u32 function) +{ + struct fw_page *fwp; + int n; + + fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK, function); + if (!fwp) { + mlx5_core_warn_rl(dev, "page not found\n"); + return; + } + n = (addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT; + fwp->free_count++; + set_bit(n, &fwp->bitmask); + if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) + free_fwp(dev, fwp, fwp->free_count != 1); + else if (fwp->free_count == 1) + list_add(&fwp->list, &dev->priv.free_list); +} + +static int alloc_system_page(struct mlx5_core_dev *dev, u32 function) +{ + struct device *device = mlx5_core_dma_dev(dev); + int nid = dev_to_node(device); + struct page *page; + u64 zero_addr = 1; + u64 addr; + int err; + + page = alloc_pages_node(nid, GFP_HIGHUSER, 0); + if (!page) { + mlx5_core_warn(dev, "failed to allocate page\n"); + return -ENOMEM; + } +map: + addr = dma_map_page(device, page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(device, addr)) { + mlx5_core_warn(dev, "failed dma mapping page\n"); + err = -ENOMEM; + goto err_mapping; + } + + /* Firmware doesn't support page with physical address 0 */ + if (addr == 0) { + zero_addr = addr; + goto map; + } + + err = insert_page(dev, addr, page, function); + if (err) { + mlx5_core_err(dev, "failed to track allocated page\n"); + dma_unmap_page(device, addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + } + +err_mapping: + if (err) + __free_page(page); + + if (zero_addr == 0) + dma_unmap_page(device, zero_addr, PAGE_SIZE, + DMA_BIDIRECTIONAL); + + return err; +} + +static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id, + bool ec_function) +{ + u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {}; + int err; + + MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); + MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_CANT_GIVE); + MLX5_SET(manage_pages_in, in, function_id, func_id); + MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); + + err = mlx5_cmd_exec_in(dev, manage_pages, in); + if (err) + mlx5_core_warn(dev, "page notify failed func_id(%d) err(%d)\n", + func_id, err); +} + +static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, + int event, bool ec_function) +{ + u32 function = get_function(func_id, ec_function); + u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; + int inlen = MLX5_ST_SZ_BYTES(manage_pages_in); + int notify_fail = event; + u16 func_type; + u64 addr; + int err; + u32 *in; + int i; + + inlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_in, pas[0]); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + mlx5_core_warn(dev, "vzalloc failed %d\n", inlen); + goto out_free; + } + + for (i = 0; i < npages; i++) { +retry: + err = alloc_4k(dev, &addr, function); + if (err) { + if (err == -ENOMEM) + err = alloc_system_page(dev, function); + if (err) { + dev->priv.fw_pages_alloc_failed += (npages - i); + goto out_4k; + } + + goto retry; + } + MLX5_ARRAY_SET64(manage_pages_in, in, pas, i, addr); + } + + MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); + MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE); + MLX5_SET(manage_pages_in, in, function_id, func_id); + MLX5_SET(manage_pages_in, in, input_num_entries, npages); + MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); + + err = mlx5_cmd_do(dev, in, inlen, out, sizeof(out)); + if (err == -EREMOTEIO) { + notify_fail = 0; + /* if triggered by FW and failed by FW ignore */ + if (event) { + err = 0; + goto out_dropped; + } + } + err = mlx5_cmd_check(dev, err, in, out); + if (err) { + mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", + func_id, npages, err); + goto out_dropped; + } + + func_type = func_id_to_type(dev, func_id, ec_function); + dev->priv.page_counters[func_type] += npages; + dev->priv.fw_pages += npages; + + mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x, err %d\n", + npages, ec_function, func_id, err); + + kvfree(in); + return 0; + +out_dropped: + dev->priv.give_pages_dropped += npages; +out_4k: + for (i--; i >= 0; i--) + free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]), function); +out_free: + kvfree(in); + if (notify_fail) + page_notify_fail(dev, func_id, ec_function); + return err; +} + +static void release_all_pages(struct mlx5_core_dev *dev, u16 func_id, + bool ec_function) +{ + u32 function = get_function(func_id, ec_function); + struct rb_root *root; + struct rb_node *p; + int npages = 0; + u16 func_type; + + root = xa_load(&dev->priv.page_root_xa, function); + if (WARN_ON_ONCE(!root)) + return; + + p = rb_first(root); + while (p) { + struct fw_page *fwp = rb_entry(p, struct fw_page, rb_node); + + p = rb_next(p); + npages += (MLX5_NUM_4K_IN_PAGE - fwp->free_count); + free_fwp(dev, fwp, fwp->free_count); + } + + func_type = func_id_to_type(dev, func_id, ec_function); + dev->priv.page_counters[func_type] -= npages; + dev->priv.fw_pages -= npages; + + mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x\n", + npages, ec_function, func_id); +} + +static u32 fwp_fill_manage_pages_out(struct fw_page *fwp, u32 *out, u32 index, + u32 npages) +{ + u32 pages_set = 0; + unsigned int n; + + for_each_clear_bit(n, &fwp->bitmask, MLX5_NUM_4K_IN_PAGE) { + MLX5_ARRAY_SET64(manage_pages_out, out, pas, index + pages_set, + fwp->addr + (n * MLX5_ADAPTER_PAGE_SIZE)); + pages_set++; + + if (!--npages) + break; + } + + return pages_set; +} + +static int reclaim_pages_cmd(struct mlx5_core_dev *dev, + u32 *in, int in_size, u32 *out, int out_size) +{ + struct rb_root *root; + struct fw_page *fwp; + struct rb_node *p; + bool ec_function; + u32 func_id; + u32 npages; + u32 i = 0; + + if (!mlx5_cmd_is_down(dev)) + return mlx5_cmd_do(dev, in, in_size, out, out_size); + + /* No hard feelings, we want our pages back! */ + npages = MLX5_GET(manage_pages_in, in, input_num_entries); + func_id = MLX5_GET(manage_pages_in, in, function_id); + ec_function = MLX5_GET(manage_pages_in, in, embedded_cpu_function); + + root = xa_load(&dev->priv.page_root_xa, get_function(func_id, ec_function)); + if (WARN_ON_ONCE(!root)) + return -EEXIST; + + p = rb_first(root); + while (p && i < npages) { + fwp = rb_entry(p, struct fw_page, rb_node); + p = rb_next(p); + + i += fwp_fill_manage_pages_out(fwp, out, i, npages - i); + } + + MLX5_SET(manage_pages_out, out, output_num_entries, i); + return 0; +} + +static int reclaim_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, + int *nclaimed, bool event, bool ec_function) +{ + u32 function = get_function(func_id, ec_function); + int outlen = MLX5_ST_SZ_BYTES(manage_pages_out); + u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {}; + int num_claimed; + u16 func_type; + u32 *out; + int err; + int i; + + if (nclaimed) + *nclaimed = 0; + + outlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]); + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); + MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE); + MLX5_SET(manage_pages_in, in, function_id, func_id); + MLX5_SET(manage_pages_in, in, input_num_entries, npages); + MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); + + mlx5_core_dbg(dev, "func 0x%x, npages %d, outlen %d\n", + func_id, npages, outlen); + err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen); + if (err) { + npages = MLX5_GET(manage_pages_in, in, input_num_entries); + dev->priv.reclaim_pages_discard += npages; + } + /* if triggered by FW event and failed by FW then ignore */ + if (event && err == -EREMOTEIO) { + err = 0; + goto out_free; + } + + err = mlx5_cmd_check(dev, err, in, out); + if (err) { + mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err); + goto out_free; + } + + num_claimed = MLX5_GET(manage_pages_out, out, output_num_entries); + if (num_claimed > npages) { + mlx5_core_warn(dev, "fw returned %d, driver asked %d => corruption\n", + num_claimed, npages); + err = -EINVAL; + goto out_free; + } + + for (i = 0; i < num_claimed; i++) + free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i]), function); + + if (nclaimed) + *nclaimed = num_claimed; + + func_type = func_id_to_type(dev, func_id, ec_function); + dev->priv.page_counters[func_type] -= num_claimed; + dev->priv.fw_pages -= num_claimed; + +out_free: + kvfree(out); + return err; +} + +static void pages_work_handler(struct work_struct *work) +{ + struct mlx5_pages_req *req = container_of(work, struct mlx5_pages_req, work); + struct mlx5_core_dev *dev = req->dev; + int err = 0; + + if (req->release_all) + release_all_pages(dev, req->func_id, req->ec_function); + else if (req->npages < 0) + err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL, + true, req->ec_function); + else if (req->npages > 0) + err = give_pages(dev, req->func_id, req->npages, 1, req->ec_function); + + if (err) + mlx5_core_warn(dev, "%s fail %d\n", + req->npages < 0 ? "reclaim" : "give", err); + + kfree(req); +} + +enum { + EC_FUNCTION_MASK = 0x8000, + RELEASE_ALL_PAGES_MASK = 0x4000, +}; + +static int req_pages_handler(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_pages_req *req; + struct mlx5_core_dev *dev; + struct mlx5_priv *priv; + struct mlx5_eqe *eqe; + bool ec_function; + bool release_all; + u16 func_id; + s32 npages; + + priv = mlx5_nb_cof(nb, struct mlx5_priv, pg_nb); + dev = container_of(priv, struct mlx5_core_dev, priv); + eqe = data; + + func_id = be16_to_cpu(eqe->data.req_pages.func_id); + npages = be32_to_cpu(eqe->data.req_pages.num_pages); + ec_function = be16_to_cpu(eqe->data.req_pages.ec_function) & EC_FUNCTION_MASK; + release_all = be16_to_cpu(eqe->data.req_pages.ec_function) & + RELEASE_ALL_PAGES_MASK; + mlx5_core_dbg(dev, "page request for func 0x%x, npages %d, release_all %d\n", + func_id, npages, release_all); + req = kzalloc(sizeof(*req), GFP_ATOMIC); + if (!req) { + mlx5_core_warn(dev, "failed to allocate pages request\n"); + return NOTIFY_DONE; + } + + req->dev = dev; + req->func_id = func_id; + req->npages = npages; + req->ec_function = ec_function; + req->release_all = release_all; + INIT_WORK(&req->work, pages_work_handler); + queue_work(dev->priv.pg_wq, &req->work); + return NOTIFY_OK; +} + +int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot) +{ + u16 func_id; + s32 npages; + int err; + + err = mlx5_cmd_query_pages(dev, &func_id, &npages, boot); + if (err) + return err; + + mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n", + npages, boot ? "boot" : "init", func_id); + + return give_pages(dev, func_id, npages, 0, mlx5_core_is_ecpf(dev)); +} + +enum { + MLX5_BLKS_FOR_RECLAIM_PAGES = 12 +}; + +static int optimal_reclaimed_pages(void) +{ + struct mlx5_cmd_prot_block *block; + struct mlx5_cmd_layout *lay; + int ret; + + ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) - + MLX5_ST_SZ_BYTES(manage_pages_out)) / + MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]); + + return ret; +} + +static int mlx5_reclaim_root_pages(struct mlx5_core_dev *dev, + struct rb_root *root, u32 function) +{ + u64 recl_pages_to_jiffies = msecs_to_jiffies(mlx5_tout_ms(dev, RECLAIM_PAGES)); + unsigned long end = jiffies + recl_pages_to_jiffies; + + while (!RB_EMPTY_ROOT(root)) { + u32 ec_function = mlx5_get_ec_function(function); + u32 function_id = mlx5_get_func_id(function); + int nclaimed; + int err; + + err = reclaim_pages(dev, function_id, optimal_reclaimed_pages(), + &nclaimed, false, ec_function); + if (err) { + mlx5_core_warn(dev, "reclaim_pages err (%d) func_id=0x%x ec_func=0x%x\n", + err, function_id, ec_function); + return err; + } + + if (nclaimed) + end = jiffies + recl_pages_to_jiffies; + + if (time_after(jiffies, end)) { + mlx5_core_warn(dev, "FW did not return all pages. giving up...\n"); + break; + } + } + + return 0; +} + +int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) +{ + struct rb_root *root; + unsigned long id; + void *entry; + + xa_for_each(&dev->priv.page_root_xa, id, entry) { + root = entry; + mlx5_reclaim_root_pages(dev, root, id); + xa_erase(&dev->priv.page_root_xa, id); + kfree(root); + } + + WARN_ON(!xa_empty(&dev->priv.page_root_xa)); + + WARN(dev->priv.fw_pages, + "FW pages counter is %d after reclaiming all pages\n", + dev->priv.fw_pages); + WARN(dev->priv.page_counters[MLX5_VF], + "VFs FW pages counter is %d after reclaiming all pages\n", + dev->priv.page_counters[MLX5_VF]); + WARN(dev->priv.page_counters[MLX5_HOST_PF], + "External host PF FW pages counter is %d after reclaiming all pages\n", + dev->priv.page_counters[MLX5_HOST_PF]); + WARN(dev->priv.page_counters[MLX5_EC_VF], + "EC VFs FW pages counter is %d after reclaiming all pages\n", + dev->priv.page_counters[MLX5_EC_VF]); + + return 0; +} + +int mlx5_pagealloc_init(struct mlx5_core_dev *dev) +{ + INIT_LIST_HEAD(&dev->priv.free_list); + dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator"); + if (!dev->priv.pg_wq) + return -ENOMEM; + + xa_init(&dev->priv.page_root_xa); + mlx5_pages_debugfs_init(dev); + + return 0; +} + +void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev) +{ + mlx5_pages_debugfs_cleanup(dev); + xa_destroy(&dev->priv.page_root_xa); + destroy_workqueue(dev->priv.pg_wq); +} + +void mlx5_pagealloc_start(struct mlx5_core_dev *dev) +{ + MLX5_NB_INIT(&dev->priv.pg_nb, req_pages_handler, PAGE_REQUEST); + mlx5_eq_notifier_register(dev, &dev->priv.pg_nb); +} + +void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) +{ + mlx5_eq_notifier_unregister(dev, &dev->priv.pg_nb); + flush_workqueue(dev->priv.pg_wq); +} + +int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages) +{ + u64 recl_vf_pages_to_jiffies = msecs_to_jiffies(mlx5_tout_ms(dev, RECLAIM_VFS_PAGES)); + unsigned long end = jiffies + recl_vf_pages_to_jiffies; + int prev_pages = *pages; + + /* In case of internal error we will free the pages manually later */ + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + mlx5_core_warn(dev, "Skipping wait for vf pages stage"); + return 0; + } + + mlx5_core_dbg(dev, "Waiting for %d pages\n", prev_pages); + while (*pages) { + if (time_after(jiffies, end)) { + mlx5_core_warn(dev, "aborting while there are %d pending pages\n", *pages); + return -ETIMEDOUT; + } + if (*pages < prev_pages) { + end = jiffies + recl_vf_pages_to_jiffies; + prev_pages = *pages; + } + msleep(50); + } + + mlx5_core_dbg(dev, "All pages received\n"); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c new file mode 100644 index 0000000000..4dcf995cb1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -0,0 +1,840 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include +#include +#include +#include +#include +#include "mlx5_core.h" +#include "mlx5_irq.h" +#include "pci_irq.h" +#include "lib/sf.h" +#include "lib/eq.h" +#ifdef CONFIG_RFS_ACCEL +#include +#endif + +#define MLX5_SFS_PER_CTRL_IRQ 64 +#define MLX5_IRQ_CTRL_SF_MAX 8 +/* min num of vectors for SFs to be enabled */ +#define MLX5_IRQ_VEC_COMP_BASE_SF 2 + +#define MLX5_EQ_SHARE_IRQ_MAX_COMP (8) +#define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX) +#define MLX5_EQ_SHARE_IRQ_MIN_COMP (1) +#define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4) + +struct mlx5_irq { + struct atomic_notifier_head nh; + cpumask_var_t mask; + char name[MLX5_MAX_IRQ_FORMATTED_NAME]; + struct mlx5_irq_pool *pool; + int refcount; + struct msi_map map; + u32 pool_index; +}; + +struct mlx5_irq_table { + struct mlx5_irq_pool *pcif_pool; + struct mlx5_irq_pool *sf_ctrl_pool; + struct mlx5_irq_pool *sf_comp_pool; +}; + +static int mlx5_core_func_to_vport(const struct mlx5_core_dev *dev, + int func, + bool ec_vf_func) +{ + if (!ec_vf_func) + return func; + return mlx5_core_ec_vf_vport_base(dev) + func - 1; +} + +/** + * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors + * to be ssigned to each VF. + * @dev: PF to work on + * @num_vfs: Number of enabled VFs + */ +int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs) +{ + int num_vf_msix, min_msix, max_msix; + + num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); + if (!num_vf_msix) + return 0; + + min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size); + max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size); + + /* Limit maximum number of MSI-X vectors so the default configuration + * has some available in the pool. This will allow the user to increase + * the number of vectors in a VF without having to first size-down other + * VFs. + */ + return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix); +} + +/** + * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF + * @dev: PF to work on + * @function_id: Internal PCI VF function IDd + * @msix_vec_count: Number of MSI-X vectors to set + */ +int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, + int msix_vec_count) +{ + int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + void *hca_cap = NULL, *query_cap = NULL, *cap; + int num_vf_msix, min_msix, max_msix; + bool ec_vf_function; + int vport; + int ret; + + num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); + if (!num_vf_msix) + return 0; + + if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev)) + return -EOPNOTSUPP; + + min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size); + max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size); + + if (msix_vec_count < min_msix) + return -EINVAL; + + if (msix_vec_count > max_msix) + return -EOVERFLOW; + + query_cap = kvzalloc(query_sz, GFP_KERNEL); + hca_cap = kvzalloc(set_sz, GFP_KERNEL); + if (!hca_cap || !query_cap) { + ret = -ENOMEM; + goto out; + } + + ec_vf_function = mlx5_core_ec_sriov_enabled(dev); + vport = mlx5_core_func_to_vport(dev, function_id, ec_vf_function); + ret = mlx5_vport_get_other_func_general_cap(dev, vport, query_cap); + if (ret) + goto out; + + cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability); + memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability), + MLX5_UN_SZ_BYTES(hca_cap_union)); + MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count); + + MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP); + MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1); + MLX5_SET(set_hca_cap_in, hca_cap, ec_vf_function, ec_vf_function); + MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id); + + MLX5_SET(set_hca_cap_in, hca_cap, op_mod, + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1); + ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap); +out: + kvfree(hca_cap); + kvfree(query_cap); + return ret; +} + +/* mlx5_system_free_irq - Free an IRQ + * @irq: IRQ to free + * + * Free the IRQ and other resources such as rmap from the system. + * BUT doesn't free or remove reference from mlx5. + * This function is very important for the shutdown flow, where we need to + * cleanup system resoruces but keep mlx5 objects alive, + * see mlx5_irq_table_free_irqs(). + */ +static void mlx5_system_free_irq(struct mlx5_irq *irq) +{ + struct mlx5_irq_pool *pool = irq->pool; +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rmap; +#endif + + /* free_irq requires that affinity_hint and rmap will be cleared before + * calling it. To satisfy this requirement, we call + * irq_cpu_rmap_remove() to remove the notifier + */ + irq_update_affinity_hint(irq->map.virq, NULL); +#ifdef CONFIG_RFS_ACCEL + rmap = mlx5_eq_table_get_rmap(pool->dev); + if (rmap) + irq_cpu_rmap_remove(rmap, irq->map.virq); +#endif + + free_irq(irq->map.virq, &irq->nh); + if (irq->map.index && pci_msix_can_alloc_dyn(pool->dev->pdev)) + pci_msix_free_irq(pool->dev->pdev, irq->map); +} + +static void irq_release(struct mlx5_irq *irq) +{ + struct mlx5_irq_pool *pool = irq->pool; + + xa_erase(&pool->irqs, irq->pool_index); + mlx5_system_free_irq(irq); + free_cpumask_var(irq->mask); + kfree(irq); +} + +int mlx5_irq_put(struct mlx5_irq *irq) +{ + struct mlx5_irq_pool *pool = irq->pool; + int ret = 0; + + mutex_lock(&pool->lock); + irq->refcount--; + if (!irq->refcount) { + irq_release(irq); + ret = 1; + } + mutex_unlock(&pool->lock); + return ret; +} + +int mlx5_irq_read_locked(struct mlx5_irq *irq) +{ + lockdep_assert_held(&irq->pool->lock); + return irq->refcount; +} + +int mlx5_irq_get_locked(struct mlx5_irq *irq) +{ + lockdep_assert_held(&irq->pool->lock); + if (WARN_ON_ONCE(!irq->refcount)) + return 0; + irq->refcount++; + return 1; +} + +static int irq_get(struct mlx5_irq *irq) +{ + int err; + + mutex_lock(&irq->pool->lock); + err = mlx5_irq_get_locked(irq); + mutex_unlock(&irq->pool->lock); + return err; +} + +static irqreturn_t irq_int_handler(int irq, void *nh) +{ + atomic_notifier_call_chain(nh, 0, NULL); + return IRQ_HANDLED; +} + +static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx) +{ + snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx); +} + +static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx) +{ + if (!pool->xa_num_irqs.max) { + /* in case we only have a single irq for the device */ + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_combined%d", vecidx); + return; + } + + if (!vecidx) { + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx); + return; + } + + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx); +} + +struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, + struct irq_affinity_desc *af_desc, + struct cpu_rmap **rmap) +{ + struct mlx5_core_dev *dev = pool->dev; + char name[MLX5_MAX_IRQ_NAME]; + struct mlx5_irq *irq; + int err; + + irq = kzalloc(sizeof(*irq), GFP_KERNEL); + if (!irq || !zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) { + kfree(irq); + return ERR_PTR(-ENOMEM); + } + + if (!i || !pci_msix_can_alloc_dyn(dev->pdev)) { + /* The vector at index 0 is always statically allocated. If + * dynamic irq is not supported all vectors are statically + * allocated. In both cases just get the irq number and set + * the index. + */ + irq->map.virq = pci_irq_vector(dev->pdev, i); + irq->map.index = i; + } else { + irq->map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, af_desc); + if (!irq->map.virq) { + err = irq->map.index; + goto err_alloc_irq; + } + } + + if (i && rmap && *rmap) { +#ifdef CONFIG_RFS_ACCEL + err = irq_cpu_rmap_add(*rmap, irq->map.virq); + if (err) + goto err_irq_rmap; +#endif + } + if (!mlx5_irq_pool_is_sf_pool(pool)) + irq_set_name(pool, name, i); + else + irq_sf_set_name(pool, name, i); + ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh); + snprintf(irq->name, MLX5_MAX_IRQ_FORMATTED_NAME, + MLX5_IRQ_NAME_FORMAT_STR, name, pci_name(dev->pdev)); + err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name, + &irq->nh); + if (err) { + mlx5_core_err(dev, "Failed to request irq. err = %d\n", err); + goto err_req_irq; + } + + if (af_desc) { + cpumask_copy(irq->mask, &af_desc->mask); + irq_set_affinity_and_hint(irq->map.virq, irq->mask); + } + irq->pool = pool; + irq->refcount = 1; + irq->pool_index = i; + err = xa_err(xa_store(&pool->irqs, irq->pool_index, irq, GFP_KERNEL)); + if (err) { + mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n", + irq->pool_index, err); + goto err_xa; + } + return irq; +err_xa: + if (af_desc) + irq_update_affinity_hint(irq->map.virq, NULL); + free_irq(irq->map.virq, &irq->nh); +err_req_irq: +#ifdef CONFIG_RFS_ACCEL + if (i && rmap && *rmap) { + free_irq_cpu_rmap(*rmap); + *rmap = NULL; + } +err_irq_rmap: +#endif + if (i && pci_msix_can_alloc_dyn(dev->pdev)) + pci_msix_free_irq(dev->pdev, irq->map); +err_alloc_irq: + free_cpumask_var(irq->mask); + kfree(irq); + return ERR_PTR(err); +} + +int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb) +{ + int ret; + + ret = irq_get(irq); + if (!ret) + /* Something very bad happens here, we are enabling EQ + * on non-existing IRQ. + */ + return -ENOENT; + ret = atomic_notifier_chain_register(&irq->nh, nb); + if (ret) + mlx5_irq_put(irq); + return ret; +} + +int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb) +{ + int err = 0; + + err = atomic_notifier_chain_unregister(&irq->nh, nb); + mlx5_irq_put(irq); + return err; +} + +struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq) +{ + return irq->mask; +} + +int mlx5_irq_get_index(struct mlx5_irq *irq) +{ + return irq->map.index; +} + +/* irq_pool API */ + +/* requesting an irq from a given pool according to given index */ +static struct mlx5_irq * +irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx, + struct irq_affinity_desc *af_desc, + struct cpu_rmap **rmap) +{ + struct mlx5_irq *irq; + + mutex_lock(&pool->lock); + irq = xa_load(&pool->irqs, vecidx); + if (irq) { + mlx5_irq_get_locked(irq); + goto unlock; + } + irq = mlx5_irq_alloc(pool, vecidx, af_desc, rmap); +unlock: + mutex_unlock(&pool->lock); + return irq; +} + +static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table) +{ + return irq_table->sf_ctrl_pool; +} + +static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table) +{ + return irq_table->sf_comp_pool; +} + +struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); + struct mlx5_irq_pool *pool = NULL; + + if (mlx5_core_is_sf(dev)) + pool = sf_irq_pool_get(irq_table); + + /* In some configs, there won't be a pool of SFs IRQs. Hence, returning + * the PF IRQs pool in case the SF pool doesn't exist. + */ + return pool ? pool : irq_table->pcif_pool; +} + +static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); + struct mlx5_irq_pool *pool = NULL; + + if (mlx5_core_is_sf(dev)) + pool = sf_ctrl_irq_pool_get(irq_table); + + /* In some configs, there won't be a pool of SFs IRQs. Hence, returning + * the PF IRQs pool in case the SF pool doesn't exist. + */ + return pool ? pool : irq_table->pcif_pool; +} + +static void _mlx5_irq_release(struct mlx5_irq *irq) +{ + synchronize_irq(irq->map.virq); + mlx5_irq_put(irq); +} + +/** + * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system. + * @ctrl_irq: ctrl IRQ to be released. + */ +void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq) +{ + _mlx5_irq_release(ctrl_irq); +} + +/** + * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device. + * @dev: mlx5 device that requesting the IRQ. + * + * This function returns a pointer to IRQ, or ERR_PTR in case of error. + */ +struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev); + struct irq_affinity_desc af_desc; + struct mlx5_irq *irq; + + cpumask_copy(&af_desc.mask, cpu_online_mask); + af_desc.is_managed = false; + if (!mlx5_irq_pool_is_sf_pool(pool)) { + /* In case we are allocating a control IRQ from a pci device's pool. + * This can happen also for a SF if the SFs pool is empty. + */ + if (!pool->xa_num_irqs.max) { + cpumask_clear(&af_desc.mask); + /* In case we only have a single IRQ for PF/VF */ + cpumask_set_cpu(cpumask_first(cpu_online_mask), &af_desc.mask); + } + /* Allocate the IRQ in index 0. The vector was already allocated */ + irq = irq_pool_request_vector(pool, 0, &af_desc, NULL); + } else { + irq = mlx5_irq_affinity_request(pool, &af_desc); + } + + return irq; +} + +/** + * mlx5_irq_request - request an IRQ for mlx5 PF/VF device. + * @dev: mlx5 device that requesting the IRQ. + * @vecidx: vector index of the IRQ. This argument is ignore if affinity is + * provided. + * @af_desc: affinity descriptor for this IRQ. + * @rmap: pointer to reverse map pointer for completion interrupts + * + * This function returns a pointer to IRQ, or ERR_PTR in case of error. + */ +struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx, + struct irq_affinity_desc *af_desc, + struct cpu_rmap **rmap) +{ + struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); + struct mlx5_irq_pool *pool; + struct mlx5_irq *irq; + + pool = irq_table->pcif_pool; + irq = irq_pool_request_vector(pool, vecidx, af_desc, rmap); + if (IS_ERR(irq)) + return irq; + mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n", + irq->map.virq, cpumask_pr_args(&af_desc->mask), + irq->refcount / MLX5_EQ_REFS_PER_IRQ); + return irq; +} + +/** + * mlx5_msix_alloc - allocate msix interrupt + * @dev: mlx5 device from which to request + * @handler: interrupt handler + * @affdesc: affinity descriptor + * @name: interrupt name + * + * Returns: struct msi_map with result encoded. + * Note: the caller must make sure to release the irq by calling + * mlx5_msix_free() if shutdown was initiated. + */ +struct msi_map mlx5_msix_alloc(struct mlx5_core_dev *dev, + irqreturn_t (*handler)(int, void *), + const struct irq_affinity_desc *affdesc, + const char *name) +{ + struct msi_map map; + int err; + + if (!dev->pdev) { + map.virq = 0; + map.index = -EINVAL; + return map; + } + + map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, affdesc); + if (!map.virq) + return map; + + err = request_irq(map.virq, handler, 0, name, NULL); + if (err) { + mlx5_core_warn(dev, "err %d\n", err); + pci_msix_free_irq(dev->pdev, map); + map.virq = 0; + map.index = -ENOMEM; + } + return map; +} +EXPORT_SYMBOL(mlx5_msix_alloc); + +/** + * mlx5_msix_free - free a previously allocated msix interrupt + * @dev: mlx5 device associated with interrupt + * @map: map previously returned by mlx5_msix_alloc() + */ +void mlx5_msix_free(struct mlx5_core_dev *dev, struct msi_map map) +{ + free_irq(map.virq, NULL); + pci_msix_free_irq(dev->pdev, map); +} +EXPORT_SYMBOL(mlx5_msix_free); + +/** + * mlx5_irq_release_vector - release one IRQ back to the system. + * @irq: the irq to release. + */ +void mlx5_irq_release_vector(struct mlx5_irq *irq) +{ + _mlx5_irq_release(irq); +} + +/** + * mlx5_irq_request_vector - request one IRQ for mlx5 device. + * @dev: mlx5 device that is requesting the IRQ. + * @cpu: CPU to bind the IRQ to. + * @vecidx: vector index to request an IRQ for. + * @rmap: pointer to reverse map pointer for completion interrupts + * + * Each IRQ is bound to at most 1 CPU. + * This function is requests one IRQ, for the given @vecidx. + * + * This function returns a pointer to the irq on success, or an error pointer + * in case of an error. + */ +struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu, + u16 vecidx, struct cpu_rmap **rmap) +{ + struct mlx5_irq_table *table = mlx5_irq_table_get(dev); + struct mlx5_irq_pool *pool = table->pcif_pool; + struct irq_affinity_desc af_desc; + int offset = 1; + + if (!pool->xa_num_irqs.max) + offset = 0; + + af_desc.is_managed = false; + cpumask_clear(&af_desc.mask); + cpumask_set_cpu(cpu, &af_desc.mask); + return mlx5_irq_request(dev, vecidx + offset, &af_desc, rmap); +} + +static struct mlx5_irq_pool * +irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name, + u32 min_threshold, u32 max_threshold) +{ + struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL); + + if (!pool) + return ERR_PTR(-ENOMEM); + pool->dev = dev; + mutex_init(&pool->lock); + xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC); + pool->xa_num_irqs.min = start; + pool->xa_num_irqs.max = start + size - 1; + if (name) + snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS, + "%s", name); + pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ; + pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ; + mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d", + name, size, start); + return pool; +} + +static void irq_pool_free(struct mlx5_irq_pool *pool) +{ + struct mlx5_irq *irq; + unsigned long index; + + /* There are cases in which we are destrying the irq_table before + * freeing all the IRQs, fast teardown for example. Hence, free the irqs + * which might not have been freed. + */ + xa_for_each(&pool->irqs, index, irq) + irq_release(irq); + xa_destroy(&pool->irqs); + mutex_destroy(&pool->lock); + kfree(pool->irqs_per_cpu); + kvfree(pool); +} + +static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec) +{ + struct mlx5_irq_table *table = dev->priv.irq_table; + int num_sf_ctrl_by_msix; + int num_sf_ctrl_by_sfs; + int num_sf_ctrl; + int err; + + /* init pcif_pool */ + table->pcif_pool = irq_pool_alloc(dev, 0, pcif_vec, NULL, + MLX5_EQ_SHARE_IRQ_MIN_COMP, + MLX5_EQ_SHARE_IRQ_MAX_COMP); + if (IS_ERR(table->pcif_pool)) + return PTR_ERR(table->pcif_pool); + if (!mlx5_sf_max_functions(dev)) + return 0; + if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) { + mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n"); + return 0; + } + + /* init sf_ctrl_pool */ + num_sf_ctrl_by_msix = DIV_ROUND_UP(sf_vec, MLX5_COMP_EQS_PER_SF); + num_sf_ctrl_by_sfs = DIV_ROUND_UP(mlx5_sf_max_functions(dev), + MLX5_SFS_PER_CTRL_IRQ); + num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs); + num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl); + table->sf_ctrl_pool = irq_pool_alloc(dev, pcif_vec, num_sf_ctrl, + "mlx5_sf_ctrl", + MLX5_EQ_SHARE_IRQ_MIN_CTRL, + MLX5_EQ_SHARE_IRQ_MAX_CTRL); + if (IS_ERR(table->sf_ctrl_pool)) { + err = PTR_ERR(table->sf_ctrl_pool); + goto err_pf; + } + /* init sf_comp_pool */ + table->sf_comp_pool = irq_pool_alloc(dev, pcif_vec + num_sf_ctrl, + sf_vec - num_sf_ctrl, "mlx5_sf_comp", + MLX5_EQ_SHARE_IRQ_MIN_COMP, + MLX5_EQ_SHARE_IRQ_MAX_COMP); + if (IS_ERR(table->sf_comp_pool)) { + err = PTR_ERR(table->sf_comp_pool); + goto err_sf_ctrl; + } + + table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL); + if (!table->sf_comp_pool->irqs_per_cpu) { + err = -ENOMEM; + goto err_irqs_per_cpu; + } + + return 0; + +err_irqs_per_cpu: + irq_pool_free(table->sf_comp_pool); +err_sf_ctrl: + irq_pool_free(table->sf_ctrl_pool); +err_pf: + irq_pool_free(table->pcif_pool); + return err; +} + +static void irq_pools_destroy(struct mlx5_irq_table *table) +{ + if (table->sf_ctrl_pool) { + irq_pool_free(table->sf_comp_pool); + irq_pool_free(table->sf_ctrl_pool); + } + irq_pool_free(table->pcif_pool); +} + +static void mlx5_irq_pool_free_irqs(struct mlx5_irq_pool *pool) +{ + struct mlx5_irq *irq; + unsigned long index; + + xa_for_each(&pool->irqs, index, irq) + mlx5_system_free_irq(irq); + +} + +static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table) +{ + if (table->sf_ctrl_pool) { + mlx5_irq_pool_free_irqs(table->sf_comp_pool); + mlx5_irq_pool_free_irqs(table->sf_ctrl_pool); + } + mlx5_irq_pool_free_irqs(table->pcif_pool); +} + +/* irq_table API */ + +int mlx5_irq_table_init(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *irq_table; + + if (mlx5_core_is_sf(dev)) + return 0; + + irq_table = kvzalloc_node(sizeof(*irq_table), GFP_KERNEL, + dev->priv.numa_node); + if (!irq_table) + return -ENOMEM; + + dev->priv.irq_table = irq_table; + return 0; +} + +void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) +{ + if (mlx5_core_is_sf(dev)) + return; + + kvfree(dev->priv.irq_table); +} + +int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table) +{ + if (!table->pcif_pool->xa_num_irqs.max) + return 1; + return table->pcif_pool->xa_num_irqs.max - table->pcif_pool->xa_num_irqs.min; +} + +int mlx5_irq_table_create(struct mlx5_core_dev *dev) +{ + int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? + MLX5_CAP_GEN(dev, max_num_eqs) : + 1 << MLX5_CAP_GEN(dev, log_max_eq); + int total_vec; + int pcif_vec; + int req_vec; + int err; + int n; + + if (mlx5_core_is_sf(dev)) + return 0; + + pcif_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1; + pcif_vec = min_t(int, pcif_vec, num_eqs); + + total_vec = pcif_vec; + if (mlx5_sf_max_functions(dev)) + total_vec += MLX5_IRQ_CTRL_SF_MAX + + MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev); + total_vec = min_t(int, total_vec, pci_msix_vec_count(dev->pdev)); + pcif_vec = min_t(int, pcif_vec, pci_msix_vec_count(dev->pdev)); + + req_vec = pci_msix_can_alloc_dyn(dev->pdev) ? 1 : total_vec; + n = pci_alloc_irq_vectors(dev->pdev, 1, req_vec, PCI_IRQ_MSIX); + if (n < 0) + return n; + + err = irq_pools_init(dev, total_vec - pcif_vec, pcif_vec); + if (err) + pci_free_irq_vectors(dev->pdev); + + return err; +} + +void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *table = dev->priv.irq_table; + + if (mlx5_core_is_sf(dev)) + return; + + /* There are cases where IRQs still will be in used when we reaching + * to here. Hence, making sure all the irqs are released. + */ + irq_pools_destroy(table); + pci_free_irq_vectors(dev->pdev); +} + +void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *table = dev->priv.irq_table; + + if (mlx5_core_is_sf(dev)) + return; + + mlx5_irq_pools_free_irqs(table); + pci_free_irq_vectors(dev->pdev); +} + +int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table) +{ + if (table->sf_comp_pool) + return min_t(int, num_online_cpus(), + table->sf_comp_pool->xa_num_irqs.max - + table->sf_comp_pool->xa_num_irqs.min + 1); + else + return mlx5_irq_table_get_num_comp(table); +} + +struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev) +{ +#ifdef CONFIG_MLX5_SF + if (mlx5_core_is_sf(dev)) + return dev->priv.parent_mdev->priv.irq_table; +#endif + return dev->priv.irq_table; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h new file mode 100644 index 0000000000..c4d377f8df --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __PCI_IRQ_H__ +#define __PCI_IRQ_H__ + +#include + +#define MLX5_MAX_IRQ_NAME (32) +#define MLX5_IRQ_NAME_FORMAT_STR ("%s@pci:%s") +#define MLX5_MAX_IRQ_FORMATTED_NAME \ + (MLX5_MAX_IRQ_NAME + sizeof(MLX5_IRQ_NAME_FORMAT_STR)) +/* max irq_index is 2047, so four chars */ +#define MLX5_MAX_IRQ_IDX_CHARS (4) +#define MLX5_EQ_REFS_PER_IRQ (2) + +struct mlx5_irq; +struct cpu_rmap; + +struct mlx5_irq_pool { + char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS]; + struct xa_limit xa_num_irqs; + struct mutex lock; /* sync IRQs creations */ + struct xarray irqs; + u32 max_threshold; + u32 min_threshold; + u16 *irqs_per_cpu; + struct mlx5_core_dev *dev; +}; + +struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev); +static inline bool mlx5_irq_pool_is_sf_pool(struct mlx5_irq_pool *pool) +{ + return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf")); +} + +struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, + struct irq_affinity_desc *af_desc, + struct cpu_rmap **rmap); +int mlx5_irq_get_locked(struct mlx5_irq *irq); +int mlx5_irq_read_locked(struct mlx5_irq *irq); +int mlx5_irq_put(struct mlx5_irq *irq); + +#endif /* __PCI_IRQ_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pd.c b/drivers/net/ethernet/mellanox/mlx5/core/pd.c new file mode 100644 index 0000000000..ee5ffdeb90 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/pd.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include "mlx5_core.h" + +int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn) +{ + u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {}; + int err; + + MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD); + err = mlx5_cmd_exec_inout(dev, alloc_pd, in, out); + if (!err) + *pdn = MLX5_GET(alloc_pd_out, out, pd); + return err; +} +EXPORT_SYMBOL(mlx5_core_alloc_pd); + +int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {}; + + MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD); + MLX5_SET(dealloc_pd_in, in, pd, pdn); + return mlx5_cmd_exec_in(dev, dealloc_pd, in); +} +EXPORT_SYMBOL(mlx5_core_dealloc_pd); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c new file mode 100644 index 0000000000..be70d1f23a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -0,0 +1,1207 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include "mlx5_core.h" + +/* calling with verbose false will not print error to log */ +int mlx5_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in, + void *data_out, int size_out, u16 reg_id, int arg, + int write, bool verbose) +{ + int outlen = MLX5_ST_SZ_BYTES(access_register_out) + size_out; + int inlen = MLX5_ST_SZ_BYTES(access_register_in) + size_in; + int err = -ENOMEM; + u32 *out = NULL; + u32 *in = NULL; + void *data; + + in = kvzalloc(inlen, GFP_KERNEL); + out = kvzalloc(outlen, GFP_KERNEL); + if (!in || !out) + goto out; + + data = MLX5_ADDR_OF(access_register_in, in, register_data); + memcpy(data, data_in, size_in); + + MLX5_SET(access_register_in, in, opcode, MLX5_CMD_OP_ACCESS_REG); + MLX5_SET(access_register_in, in, op_mod, !write); + MLX5_SET(access_register_in, in, argument, arg); + MLX5_SET(access_register_in, in, register_id, reg_id); + + err = mlx5_cmd_do(dev, in, inlen, out, outlen); + if (verbose) + err = mlx5_cmd_check(dev, err, in, out); + if (err) + goto out; + + data = MLX5_ADDR_OF(access_register_out, out, register_data); + memcpy(data_out, data, size_out); + +out: + kvfree(out); + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_access_reg); + +int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, + int size_in, void *data_out, int size_out, + u16 reg_id, int arg, int write) +{ + return mlx5_access_reg(dev, data_in, size_in, data_out, size_out, + reg_id, arg, write, true); +} +EXPORT_SYMBOL_GPL(mlx5_core_access_reg); + +int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group, + u8 access_reg_group) +{ + u32 in[MLX5_ST_SZ_DW(pcam_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(pcam_reg); + + MLX5_SET(pcam_reg, in, feature_group, feature_group); + MLX5_SET(pcam_reg, in, access_reg_group, access_reg_group); + + return mlx5_core_access_reg(dev, in, sz, pcam, sz, MLX5_REG_PCAM, 0, 0); +} + +int mlx5_query_mcam_reg(struct mlx5_core_dev *dev, u32 *mcam, u8 feature_group, + u8 access_reg_group) +{ + u32 in[MLX5_ST_SZ_DW(mcam_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(mcam_reg); + + MLX5_SET(mcam_reg, in, feature_group, feature_group); + MLX5_SET(mcam_reg, in, access_reg_group, access_reg_group); + + return mlx5_core_access_reg(dev, in, sz, mcam, sz, MLX5_REG_MCAM, 0, 0); +} + +int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam, + u8 feature_group, u8 access_reg_group) +{ + u32 in[MLX5_ST_SZ_DW(qcam_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(qcam_reg); + + MLX5_SET(qcam_reg, in, feature_group, feature_group); + MLX5_SET(qcam_reg, in, access_reg_group, access_reg_group); + + return mlx5_core_access_reg(mdev, in, sz, qcam, sz, MLX5_REG_QCAM, 0, 0); +} + +struct mlx5_reg_pcap { + u8 rsvd0; + u8 port_num; + u8 rsvd1[2]; + __be32 caps_127_96; + __be32 caps_95_64; + __be32 caps_63_32; + __be32 caps_31_0; +}; + +int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps) +{ + struct mlx5_reg_pcap in; + struct mlx5_reg_pcap out; + + memset(&in, 0, sizeof(in)); + in.caps_127_96 = cpu_to_be32(caps); + in.port_num = port_num; + + return mlx5_core_access_reg(dev, &in, sizeof(in), &out, + sizeof(out), MLX5_REG_PCAP, 0, 1); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_caps); + +int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, + int ptys_size, int proto_mask, u8 local_port) +{ + u32 in[MLX5_ST_SZ_DW(ptys_reg)] = {0}; + + MLX5_SET(ptys_reg, in, local_port, local_port); + MLX5_SET(ptys_reg, in, proto_mask, proto_mask); + return mlx5_core_access_reg(dev, in, sizeof(in), ptys, + ptys_size, MLX5_REG_PTYS, 0, 0); +} +EXPORT_SYMBOL_GPL(mlx5_query_port_ptys); + +int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration) +{ + u32 in[MLX5_ST_SZ_DW(mlcr_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(mlcr_reg)]; + + MLX5_SET(mlcr_reg, in, local_port, 1); + MLX5_SET(mlcr_reg, in, beacon_duration, beacon_duration); + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MLCR, 0, 1); +} + +int mlx5_query_ib_port_oper(struct mlx5_core_dev *dev, u16 *link_width_oper, + u16 *proto_oper, u8 local_port) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_IB, + local_port); + if (err) + return err; + + *link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper); + *proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper); + + return 0; +} +EXPORT_SYMBOL(mlx5_query_ib_port_oper); + +/* This function should be used after setting a port register only */ +void mlx5_toggle_port_link(struct mlx5_core_dev *dev) +{ + enum mlx5_port_status ps; + + mlx5_query_port_admin_status(dev, &ps); + mlx5_set_port_admin_status(dev, MLX5_PORT_DOWN); + if (ps == MLX5_PORT_UP) + mlx5_set_port_admin_status(dev, MLX5_PORT_UP); +} +EXPORT_SYMBOL_GPL(mlx5_toggle_port_link); + +int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, + enum mlx5_port_status status) +{ + u32 in[MLX5_ST_SZ_DW(paos_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(paos_reg)]; + + MLX5_SET(paos_reg, in, local_port, 1); + MLX5_SET(paos_reg, in, admin_status, status); + MLX5_SET(paos_reg, in, ase, 1); + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PAOS, 0, 1); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_admin_status); + +int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, + enum mlx5_port_status *status) +{ + u32 in[MLX5_ST_SZ_DW(paos_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(paos_reg)]; + int err; + + MLX5_SET(paos_reg, in, local_port, 1); + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PAOS, 0, 0); + if (err) + return err; + *status = MLX5_GET(paos_reg, out, admin_status); + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_admin_status); + +static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, u16 *admin_mtu, + u16 *max_mtu, u16 *oper_mtu, u8 port) +{ + u32 in[MLX5_ST_SZ_DW(pmtu_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; + + MLX5_SET(pmtu_reg, in, local_port, port); + mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PMTU, 0, 0); + + if (max_mtu) + *max_mtu = MLX5_GET(pmtu_reg, out, max_mtu); + if (oper_mtu) + *oper_mtu = MLX5_GET(pmtu_reg, out, oper_mtu); + if (admin_mtu) + *admin_mtu = MLX5_GET(pmtu_reg, out, admin_mtu); +} + +int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port) +{ + u32 in[MLX5_ST_SZ_DW(pmtu_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; + + MLX5_SET(pmtu_reg, in, admin_mtu, mtu); + MLX5_SET(pmtu_reg, in, local_port, port); + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PMTU, 0, 1); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_mtu); + +void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, + u8 port) +{ + mlx5_query_port_mtu(dev, NULL, max_mtu, NULL, port); +} +EXPORT_SYMBOL_GPL(mlx5_query_port_max_mtu); + +void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu, + u8 port) +{ + mlx5_query_port_mtu(dev, NULL, NULL, oper_mtu, port); +} +EXPORT_SYMBOL_GPL(mlx5_query_port_oper_mtu); + +int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num) +{ + u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(pmlp_reg)]; + int err; + + MLX5_SET(pmlp_reg, in, local_port, 1); + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_PMLP, 0, 0); + if (err) + return err; + + *module_num = MLX5_GET(lane_2_module_mapping, + MLX5_ADDR_OF(pmlp_reg, out, lane0_module_mapping), + module); + + return 0; +} + +static int mlx5_query_module_id(struct mlx5_core_dev *dev, int module_num, + u8 *module_id) +{ + u32 in[MLX5_ST_SZ_DW(mcia_reg)] = {}; + u32 out[MLX5_ST_SZ_DW(mcia_reg)]; + int err, status; + u8 *ptr; + + MLX5_SET(mcia_reg, in, i2c_device_address, MLX5_I2C_ADDR_LOW); + MLX5_SET(mcia_reg, in, module, module_num); + MLX5_SET(mcia_reg, in, device_address, 0); + MLX5_SET(mcia_reg, in, page_number, 0); + MLX5_SET(mcia_reg, in, size, 1); + MLX5_SET(mcia_reg, in, l, 0); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MCIA, 0, 0); + if (err) + return err; + + status = MLX5_GET(mcia_reg, out, status); + if (status) { + mlx5_core_err(dev, "query_mcia_reg failed: status: 0x%x\n", + status); + return -EIO; + } + ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0); + + *module_id = ptr[0]; + + return 0; +} + +static int mlx5_qsfp_eeprom_page(u16 offset) +{ + if (offset < MLX5_EEPROM_PAGE_LENGTH) + /* Addresses between 0-255 - page 00 */ + return 0; + + /* Addresses between 256 - 639 belongs to pages 01, 02 and 03 + * For example, offset = 400 belongs to page 02: + * 1 + ((400 - 256)/128) = 2 + */ + return 1 + ((offset - MLX5_EEPROM_PAGE_LENGTH) / + MLX5_EEPROM_HIGH_PAGE_LENGTH); +} + +static int mlx5_qsfp_eeprom_high_page_offset(int page_num) +{ + if (!page_num) /* Page 0 always start from low page */ + return 0; + + /* High page */ + return page_num * MLX5_EEPROM_HIGH_PAGE_LENGTH; +} + +static void mlx5_qsfp_eeprom_params_set(u16 *i2c_addr, int *page_num, u16 *offset) +{ + *i2c_addr = MLX5_I2C_ADDR_LOW; + *page_num = mlx5_qsfp_eeprom_page(*offset); + *offset -= mlx5_qsfp_eeprom_high_page_offset(*page_num); +} + +static void mlx5_sfp_eeprom_params_set(u16 *i2c_addr, int *page_num, u16 *offset) +{ + *i2c_addr = MLX5_I2C_ADDR_LOW; + *page_num = 0; + + if (*offset < MLX5_EEPROM_PAGE_LENGTH) + return; + + *i2c_addr = MLX5_I2C_ADDR_HIGH; + *offset -= MLX5_EEPROM_PAGE_LENGTH; +} + +static int mlx5_mcia_max_bytes(struct mlx5_core_dev *dev) +{ + /* mcia supports either 12 dwords or 32 dwords */ + return (MLX5_CAP_MCAM_FEATURE(dev, mcia_32dwords) ? 32 : 12) * sizeof(u32); +} + +static int mlx5_query_mcia(struct mlx5_core_dev *dev, + struct mlx5_module_eeprom_query_params *params, u8 *data) +{ + u32 in[MLX5_ST_SZ_DW(mcia_reg)] = {}; + u32 out[MLX5_ST_SZ_DW(mcia_reg)]; + int status, err; + void *ptr; + u16 size; + + size = min_t(int, params->size, mlx5_mcia_max_bytes(dev)); + + MLX5_SET(mcia_reg, in, l, 0); + MLX5_SET(mcia_reg, in, size, size); + MLX5_SET(mcia_reg, in, module, params->module_number); + MLX5_SET(mcia_reg, in, device_address, params->offset); + MLX5_SET(mcia_reg, in, page_number, params->page); + MLX5_SET(mcia_reg, in, i2c_device_address, params->i2c_address); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MCIA, 0, 0); + if (err) + return err; + + status = MLX5_GET(mcia_reg, out, status); + if (status) { + mlx5_core_err(dev, "query_mcia_reg failed: status: 0x%x\n", + status); + return -EIO; + } + + ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0); + memcpy(data, ptr, size); + + return size; +} + +int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, + u16 offset, u16 size, u8 *data) +{ + struct mlx5_module_eeprom_query_params query = {0}; + u8 module_id; + int err; + + err = mlx5_query_module_num(dev, &query.module_number); + if (err) + return err; + + err = mlx5_query_module_id(dev, query.module_number, &module_id); + if (err) + return err; + + switch (module_id) { + case MLX5_MODULE_ID_SFP: + mlx5_sfp_eeprom_params_set(&query.i2c_address, &query.page, &offset); + break; + case MLX5_MODULE_ID_QSFP: + case MLX5_MODULE_ID_QSFP_PLUS: + case MLX5_MODULE_ID_QSFP28: + mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &offset); + break; + default: + mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id); + return -EINVAL; + } + + if (offset + size > MLX5_EEPROM_PAGE_LENGTH) + /* Cross pages read, read until offset 256 in low page */ + size = MLX5_EEPROM_PAGE_LENGTH - offset; + + query.size = size; + query.offset = offset; + + return mlx5_query_mcia(dev, &query, data); +} +EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom); + +int mlx5_query_module_eeprom_by_page(struct mlx5_core_dev *dev, + struct mlx5_module_eeprom_query_params *params, + u8 *data) +{ + int err; + + err = mlx5_query_module_num(dev, ¶ms->module_number); + if (err) + return err; + + if (params->i2c_address != MLX5_I2C_ADDR_HIGH && + params->i2c_address != MLX5_I2C_ADDR_LOW) { + mlx5_core_err(dev, "I2C address not recognized: 0x%x\n", params->i2c_address); + return -EINVAL; + } + + return mlx5_query_mcia(dev, params, data); +} +EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom_by_page); + +static int mlx5_query_port_pvlc(struct mlx5_core_dev *dev, u32 *pvlc, + int pvlc_size, u8 local_port) +{ + u32 in[MLX5_ST_SZ_DW(pvlc_reg)] = {0}; + + MLX5_SET(pvlc_reg, in, local_port, local_port); + return mlx5_core_access_reg(dev, in, sizeof(in), pvlc, + pvlc_size, MLX5_REG_PVLC, 0, 0); +} + +int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, + u8 *vl_hw_cap, u8 local_port) +{ + u32 out[MLX5_ST_SZ_DW(pvlc_reg)]; + int err; + + err = mlx5_query_port_pvlc(dev, out, sizeof(out), local_port); + if (err) + return err; + + *vl_hw_cap = MLX5_GET(pvlc_reg, out, vl_hw_cap); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_vl_hw_cap); + +static int mlx5_query_pfcc_reg(struct mlx5_core_dev *dev, u32 *out, + u32 out_size) +{ + u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; + + MLX5_SET(pfcc_reg, in, local_port, 1); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + out_size, MLX5_REG_PFCC, 0, 0); +} + +int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause) +{ + u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + + MLX5_SET(pfcc_reg, in, local_port, 1); + MLX5_SET(pfcc_reg, in, pptx, tx_pause); + MLX5_SET(pfcc_reg, in, pprx, rx_pause); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PFCC, 0, 1); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_pause); + +int mlx5_query_port_pause(struct mlx5_core_dev *dev, + u32 *rx_pause, u32 *tx_pause) +{ + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + int err; + + err = mlx5_query_pfcc_reg(dev, out, sizeof(out)); + if (err) + return err; + + if (rx_pause) + *rx_pause = MLX5_GET(pfcc_reg, out, pprx); + + if (tx_pause) + *tx_pause = MLX5_GET(pfcc_reg, out, pptx); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_pause); + +int mlx5_set_port_stall_watermark(struct mlx5_core_dev *dev, + u16 stall_critical_watermark, + u16 stall_minor_watermark) +{ + u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + + MLX5_SET(pfcc_reg, in, local_port, 1); + MLX5_SET(pfcc_reg, in, pptx_mask_n, 1); + MLX5_SET(pfcc_reg, in, pprx_mask_n, 1); + MLX5_SET(pfcc_reg, in, ppan_mask_n, 1); + MLX5_SET(pfcc_reg, in, critical_stall_mask, 1); + MLX5_SET(pfcc_reg, in, minor_stall_mask, 1); + MLX5_SET(pfcc_reg, in, device_stall_critical_watermark, + stall_critical_watermark); + MLX5_SET(pfcc_reg, in, device_stall_minor_watermark, stall_minor_watermark); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PFCC, 0, 1); +} + +int mlx5_query_port_stall_watermark(struct mlx5_core_dev *dev, + u16 *stall_critical_watermark, + u16 *stall_minor_watermark) +{ + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + int err; + + err = mlx5_query_pfcc_reg(dev, out, sizeof(out)); + if (err) + return err; + + if (stall_critical_watermark) + *stall_critical_watermark = MLX5_GET(pfcc_reg, out, + device_stall_critical_watermark); + + if (stall_minor_watermark) + *stall_minor_watermark = MLX5_GET(pfcc_reg, out, + device_stall_minor_watermark); + + return 0; +} + +int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx) +{ + u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + + MLX5_SET(pfcc_reg, in, local_port, 1); + MLX5_SET(pfcc_reg, in, pfctx, pfc_en_tx); + MLX5_SET(pfcc_reg, in, pfcrx, pfc_en_rx); + MLX5_SET_TO_ONES(pfcc_reg, in, prio_mask_tx); + MLX5_SET_TO_ONES(pfcc_reg, in, prio_mask_rx); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PFCC, 0, 1); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_pfc); + +int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx) +{ + u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; + int err; + + err = mlx5_query_pfcc_reg(dev, out, sizeof(out)); + if (err) + return err; + + if (pfc_en_tx) + *pfc_en_tx = MLX5_GET(pfcc_reg, out, pfctx); + + if (pfc_en_rx) + *pfc_en_rx = MLX5_GET(pfcc_reg, out, pfcrx); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_pfc); + +int mlx5_max_tc(struct mlx5_core_dev *mdev) +{ + u8 num_tc = MLX5_CAP_GEN(mdev, max_tc) ? : 8; + + return num_tc - 1; +} + +int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out) +{ + u32 in[MLX5_ST_SZ_DW(dcbx_param)] = {0}; + + MLX5_SET(dcbx_param, in, port_number, 1); + + return mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(in), MLX5_REG_DCBX_PARAM, 0, 0); +} + +int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in) +{ + u32 out[MLX5_ST_SZ_DW(dcbx_param)]; + + MLX5_SET(dcbx_param, in, port_number, 1); + + return mlx5_core_access_reg(mdev, in, sizeof(out), out, + sizeof(out), MLX5_REG_DCBX_PARAM, 0, 1); +} + +int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc) +{ + u32 in[MLX5_ST_SZ_DW(qtct_reg)] = {0}; + u32 out[MLX5_ST_SZ_DW(qtct_reg)]; + int err; + int i; + + for (i = 0; i < 8; i++) { + if (prio_tc[i] > mlx5_max_tc(mdev)) + return -EINVAL; + + MLX5_SET(qtct_reg, in, prio, i); + MLX5_SET(qtct_reg, in, tclass, prio_tc[i]); + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_QTCT, 0, 1); + if (err) + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_set_port_prio_tc); + +int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev, + u8 prio, u8 *tc) +{ + u32 in[MLX5_ST_SZ_DW(qtct_reg)]; + u32 out[MLX5_ST_SZ_DW(qtct_reg)]; + int err; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(qtct_reg, in, port_number, 1); + MLX5_SET(qtct_reg, in, prio, prio); + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_QTCT, 0, 0); + if (!err) + *tc = MLX5_GET(qtct_reg, out, tclass); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_prio_tc); + +static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in, + int inlen) +{ + u32 out[MLX5_ST_SZ_DW(qetc_reg)]; + + if (!MLX5_CAP_GEN(mdev, ets)) + return -EOPNOTSUPP; + + return mlx5_core_access_reg(mdev, in, inlen, out, sizeof(out), + MLX5_REG_QETCR, 0, 1); +} + +static int mlx5_query_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *out, + int outlen) +{ + u32 in[MLX5_ST_SZ_DW(qetc_reg)]; + + if (!MLX5_CAP_GEN(mdev, ets)) + return -EOPNOTSUPP; + + memset(in, 0, sizeof(in)); + return mlx5_core_access_reg(mdev, in, sizeof(in), out, outlen, + MLX5_REG_QETCR, 0, 0); +} + +int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group) +{ + u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0}; + int i; + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + MLX5_SET(qetc_reg, in, tc_configuration[i].g, 1); + MLX5_SET(qetc_reg, in, tc_configuration[i].group, tc_group[i]); + } + + return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in)); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_tc_group); + +int mlx5_query_port_tc_group(struct mlx5_core_dev *mdev, + u8 tc, u8 *tc_group) +{ + u32 out[MLX5_ST_SZ_DW(qetc_reg)]; + void *ets_tcn_conf; + int err; + + err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out)); + if (err) + return err; + + ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out, + tc_configuration[tc]); + + *tc_group = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf, + group); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_tc_group); + +int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw) +{ + u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0}; + int i; + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + MLX5_SET(qetc_reg, in, tc_configuration[i].b, 1); + MLX5_SET(qetc_reg, in, tc_configuration[i].bw_allocation, tc_bw[i]); + } + + return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in)); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_tc_bw_alloc); + +int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev, + u8 tc, u8 *bw_pct) +{ + u32 out[MLX5_ST_SZ_DW(qetc_reg)]; + void *ets_tcn_conf; + int err; + + err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out)); + if (err) + return err; + + ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out, + tc_configuration[tc]); + + *bw_pct = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf, + bw_allocation); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_tc_bw_alloc); + +int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev, + u8 *max_bw_value, + u8 *max_bw_units) +{ + u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0}; + void *ets_tcn_conf; + int i; + + MLX5_SET(qetc_reg, in, port_number, 1); + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, in, tc_configuration[i]); + + MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, r, 1); + MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, max_bw_units, + max_bw_units[i]); + MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, max_bw_value, + max_bw_value[i]); + } + + return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in)); +} +EXPORT_SYMBOL_GPL(mlx5_modify_port_ets_rate_limit); + +int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev, + u8 *max_bw_value, + u8 *max_bw_units) +{ + u32 out[MLX5_ST_SZ_DW(qetc_reg)]; + void *ets_tcn_conf; + int err; + int i; + + err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out)); + if (err) + return err; + + for (i = 0; i <= mlx5_max_tc(mdev); i++) { + ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out, tc_configuration[i]); + + max_bw_value[i] = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf, + max_bw_value); + max_bw_units[i] = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf, + max_bw_units); + } + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_ets_rate_limit); + +int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode) +{ + u32 in[MLX5_ST_SZ_DW(set_wol_rol_in)] = {}; + + MLX5_SET(set_wol_rol_in, in, opcode, MLX5_CMD_OP_SET_WOL_ROL); + MLX5_SET(set_wol_rol_in, in, wol_mode_valid, 1); + MLX5_SET(set_wol_rol_in, in, wol_mode, wol_mode); + return mlx5_cmd_exec_in(mdev, set_wol_rol, in); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_wol); + +int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode) +{ + u32 out[MLX5_ST_SZ_DW(query_wol_rol_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_wol_rol_in)] = {}; + int err; + + MLX5_SET(query_wol_rol_in, in, opcode, MLX5_CMD_OP_QUERY_WOL_ROL); + err = mlx5_cmd_exec_inout(mdev, query_wol_rol, in, out); + if (!err) + *wol_mode = MLX5_GET(query_wol_rol_out, out, wol_mode); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_wol); + +int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0}; + + MLX5_SET(pcmr_reg, in, local_port, 1); + return mlx5_core_access_reg(mdev, in, sizeof(in), out, + outlen, MLX5_REG_PCMR, 0, 0); +} + +int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(pcmr_reg)]; + + return mlx5_core_access_reg(mdev, in, inlen, out, + sizeof(out), MLX5_REG_PCMR, 0, 1); +} + +int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0}; + int err; + + err = mlx5_query_ports_check(mdev, in, sizeof(in)); + if (err) + return err; + MLX5_SET(pcmr_reg, in, local_port, 1); + MLX5_SET(pcmr_reg, in, fcs_chk, enable); + return mlx5_set_ports_check(mdev, in, sizeof(in)); +} + +void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported, + bool *enabled) +{ + u32 out[MLX5_ST_SZ_DW(pcmr_reg)]; + /* Default values for FW which do not support MLX5_REG_PCMR */ + *supported = false; + *enabled = true; + + if (!MLX5_CAP_GEN(mdev, ports_check)) + return; + + if (mlx5_query_ports_check(mdev, out, sizeof(out))) + return; + + *supported = !!(MLX5_GET(pcmr_reg, out, fcs_cap)); + *enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk)); +} + +int mlx5_query_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size) +{ + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + + return mlx5_core_access_reg(mdev, in, sizeof(in), mtpps, + mtpps_size, MLX5_REG_MTPPS, 0, 0); +} + +int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size) +{ + u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + + return mlx5_core_access_reg(mdev, mtpps, mtpps_size, out, + sizeof(out), MLX5_REG_MTPPS, 0, 1); +} + +int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode) +{ + u32 out[MLX5_ST_SZ_DW(mtppse_reg)] = {0}; + u32 in[MLX5_ST_SZ_DW(mtppse_reg)] = {0}; + int err = 0; + + MLX5_SET(mtppse_reg, in, pin, pin); + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MTPPSE, 0, 0); + if (err) + return err; + + *arm = MLX5_GET(mtppse_reg, in, event_arm); + *mode = MLX5_GET(mtppse_reg, in, event_generation_mode); + + return err; +} + +int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode) +{ + u32 out[MLX5_ST_SZ_DW(mtppse_reg)] = {0}; + u32 in[MLX5_ST_SZ_DW(mtppse_reg)] = {0}; + + MLX5_SET(mtppse_reg, in, pin, pin); + MLX5_SET(mtppse_reg, in, event_arm, arm); + MLX5_SET(mtppse_reg, in, event_generation_mode, mode); + + return mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MTPPSE, 0, 1); +} + +int mlx5_set_trust_state(struct mlx5_core_dev *mdev, u8 trust_state) +{ + u32 out[MLX5_ST_SZ_DW(qpts_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(qpts_reg)] = {}; + int err; + + MLX5_SET(qpts_reg, in, local_port, 1); + MLX5_SET(qpts_reg, in, trust_state, trust_state); + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_QPTS, 0, 1); + return err; +} + +int mlx5_query_trust_state(struct mlx5_core_dev *mdev, u8 *trust_state) +{ + u32 out[MLX5_ST_SZ_DW(qpts_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(qpts_reg)] = {}; + int err; + + MLX5_SET(qpts_reg, in, local_port, 1); + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_QPTS, 0, 0); + if (!err) + *trust_state = MLX5_GET(qpts_reg, out, trust_state); + + return err; +} + +int mlx5_set_dscp2prio(struct mlx5_core_dev *mdev, u8 dscp, u8 prio) +{ + int sz = MLX5_ST_SZ_BYTES(qpdpm_reg); + void *qpdpm_dscp; + void *out; + void *in; + int err; + + in = kzalloc(sz, GFP_KERNEL); + out = kzalloc(sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(qpdpm_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 0); + if (err) + goto out; + + memcpy(in, out, sz); + MLX5_SET(qpdpm_reg, in, local_port, 1); + + /* Update the corresponding dscp entry */ + qpdpm_dscp = MLX5_ADDR_OF(qpdpm_reg, in, dscp[dscp]); + MLX5_SET16(qpdpm_dscp_reg, qpdpm_dscp, prio, prio); + MLX5_SET16(qpdpm_dscp_reg, qpdpm_dscp, e, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 1); + +out: + kfree(in); + kfree(out); + return err; +} + +/* dscp2prio[i]: priority that dscp i mapped to */ +#define MLX5E_SUPPORTED_DSCP 64 +int mlx5_query_dscp2prio(struct mlx5_core_dev *mdev, u8 *dscp2prio) +{ + int sz = MLX5_ST_SZ_BYTES(qpdpm_reg); + void *qpdpm_dscp; + void *out; + void *in; + int err; + int i; + + in = kzalloc(sz, GFP_KERNEL); + out = kzalloc(sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(qpdpm_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 0); + if (err) + goto out; + + for (i = 0; i < (MLX5E_SUPPORTED_DSCP); i++) { + qpdpm_dscp = MLX5_ADDR_OF(qpdpm_reg, out, dscp[i]); + dscp2prio[i] = MLX5_GET16(qpdpm_dscp_reg, qpdpm_dscp, prio); + } + +out: + kfree(in); + kfree(out); + return err; +} + +/* speed in units of 1Mb */ +static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = { + [MLX5E_1000BASE_CX_SGMII] = 1000, + [MLX5E_1000BASE_KX] = 1000, + [MLX5E_10GBASE_CX4] = 10000, + [MLX5E_10GBASE_KX4] = 10000, + [MLX5E_10GBASE_KR] = 10000, + [MLX5E_20GBASE_KR2] = 20000, + [MLX5E_40GBASE_CR4] = 40000, + [MLX5E_40GBASE_KR4] = 40000, + [MLX5E_56GBASE_R4] = 56000, + [MLX5E_10GBASE_CR] = 10000, + [MLX5E_10GBASE_SR] = 10000, + [MLX5E_10GBASE_ER] = 10000, + [MLX5E_40GBASE_SR4] = 40000, + [MLX5E_40GBASE_LR4] = 40000, + [MLX5E_50GBASE_SR2] = 50000, + [MLX5E_100GBASE_CR4] = 100000, + [MLX5E_100GBASE_SR4] = 100000, + [MLX5E_100GBASE_KR4] = 100000, + [MLX5E_100GBASE_LR4] = 100000, + [MLX5E_100BASE_TX] = 100, + [MLX5E_1000BASE_T] = 1000, + [MLX5E_10GBASE_T] = 10000, + [MLX5E_25GBASE_CR] = 25000, + [MLX5E_25GBASE_KR] = 25000, + [MLX5E_25GBASE_SR] = 25000, + [MLX5E_50GBASE_CR2] = 50000, + [MLX5E_50GBASE_KR2] = 50000, +}; + +static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = { + [MLX5E_SGMII_100M] = 100, + [MLX5E_1000BASE_X_SGMII] = 1000, + [MLX5E_5GBASE_R] = 5000, + [MLX5E_10GBASE_XFI_XAUI_1] = 10000, + [MLX5E_40GBASE_XLAUI_4_XLPPI_4] = 40000, + [MLX5E_25GAUI_1_25GBASE_CR_KR] = 25000, + [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = 50000, + [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR] = 50000, + [MLX5E_CAUI_4_100GBASE_CR4_KR4] = 100000, + [MLX5E_100GAUI_2_100GBASE_CR2_KR2] = 100000, + [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = 200000, + [MLX5E_400GAUI_8] = 400000, + [MLX5E_100GAUI_1_100GBASE_CR_KR] = 100000, + [MLX5E_200GAUI_2_200GBASE_CR2_KR2] = 200000, + [MLX5E_400GAUI_4_400GBASE_CR4_KR4] = 400000, +}; + +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, + struct mlx5_port_eth_proto *eproto) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + if (!eproto) + return -EINVAL; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port); + if (err) + return err; + + eproto->cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, + eth_proto_capability); + eproto->admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_admin); + eproto->oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper); + return 0; +} + +bool mlx5_ptys_ext_supported(struct mlx5_core_dev *mdev) +{ + struct mlx5_port_eth_proto eproto; + int err; + + if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet)) + return true; + + err = mlx5_port_query_eth_proto(mdev, 1, true, &eproto); + if (err) + return false; + + return !!eproto.cap; +} + +static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev, + const u32 **arr, u32 *size, + bool force_legacy) +{ + bool ext = force_legacy ? false : mlx5_ptys_ext_supported(mdev); + + *size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) : + ARRAY_SIZE(mlx5e_link_speed); + *arr = ext ? mlx5e_ext_link_speed : mlx5e_link_speed; +} + +u32 mlx5_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper, + bool force_legacy) +{ + unsigned long temp = eth_proto_oper; + const u32 *table; + u32 speed = 0; + u32 max_size; + int i; + + mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy); + i = find_first_bit(&temp, max_size); + if (i < max_size) + speed = table[i]; + return speed; +} + +u32 mlx5_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed, + bool force_legacy) +{ + u32 link_modes = 0; + const u32 *table; + u32 max_size; + int i; + + mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy); + for (i = 0; i < max_size; ++i) { + if (table[i] == speed) + link_modes |= MLX5E_PROT_MASK(i); + } + return link_modes; +} + +int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) +{ + struct mlx5_port_eth_proto eproto; + u32 max_speed = 0; + const u32 *table; + u32 max_size; + bool ext; + int err; + int i; + + ext = mlx5_ptys_ext_supported(mdev); + err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); + if (err) + return err; + + mlx5e_port_get_speed_arr(mdev, &table, &max_size, false); + for (i = 0; i < max_size; ++i) + if (eproto.cap & MLX5E_PROT_MASK(i)) + max_speed = max(max_speed, table[i]); + + *speed = max_speed; + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/qos.c new file mode 100644 index 0000000000..8bce730b5c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#include "qos.h" + +#define MLX5_QOS_DEFAULT_DWRR_UID 0 + +bool mlx5_qos_is_supported(struct mlx5_core_dev *mdev) +{ + if (!MLX5_CAP_GEN(mdev, qos)) + return false; + if (!MLX5_CAP_QOS(mdev, nic_sq_scheduling)) + return false; + if (!MLX5_CAP_QOS(mdev, nic_bw_share)) + return false; + if (!MLX5_CAP_QOS(mdev, nic_rate_limit)) + return false; + return true; +} + +int mlx5_qos_max_leaf_nodes(struct mlx5_core_dev *mdev) +{ + return 1 << MLX5_CAP_QOS(mdev, log_max_qos_nic_queue_group); +} + +int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id, + u32 bw_share, u32 max_avg_bw, u32 *id) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; + + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id); + MLX5_SET(scheduling_context, sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw); + + return mlx5_create_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, + sched_ctx, id); +} + +int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id, + u32 bw_share, u32 max_avg_bw, u32 *id) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; + void *attr; + + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id); + MLX5_SET(scheduling_context, sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw); + + attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); + MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR); + + return mlx5_create_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, + sched_ctx, id); +} + +int mlx5_qos_create_root_node(struct mlx5_core_dev *mdev, u32 *id) +{ + return mlx5_qos_create_inner_node(mdev, MLX5_QOS_DEFAULT_DWRR_UID, 0, 0, id); +} + +int mlx5_qos_update_node(struct mlx5_core_dev *mdev, + u32 bw_share, u32 max_avg_bw, u32 id) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; + u32 bitmask = 0; + + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw); + + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE; + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; + + return mlx5_modify_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, + sched_ctx, id, bitmask); +} + +int mlx5_qos_destroy_node(struct mlx5_core_dev *mdev, u32 id) +{ + return mlx5_destroy_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, id); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/qos.h new file mode 100644 index 0000000000..624ce822b7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_QOS_H +#define __MLX5_QOS_H + +#include "mlx5_core.h" + +#define MLX5_DEBUG_QOS_MASK BIT(4) + +#define qos_err(mdev, fmt, ...) \ + mlx5_core_err(mdev, "QoS: " fmt, ##__VA_ARGS__) +#define qos_warn(mdev, fmt, ...) \ + mlx5_core_warn(mdev, "QoS: " fmt, ##__VA_ARGS__) +#define qos_dbg(mdev, fmt, ...) \ + mlx5_core_dbg_mask(mdev, MLX5_DEBUG_QOS_MASK, "QoS: " fmt, ##__VA_ARGS__) + +bool mlx5_qos_is_supported(struct mlx5_core_dev *mdev); +int mlx5_qos_max_leaf_nodes(struct mlx5_core_dev *mdev); + +int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id, + u32 bw_share, u32 max_avg_bw, u32 *id); +int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id, + u32 bw_share, u32 max_avg_bw, u32 *id); +int mlx5_qos_create_root_node(struct mlx5_core_dev *mdev, u32 *id); +int mlx5_qos_update_node(struct mlx5_core_dev *mdev, u32 bw_share, + u32 max_avg_bw, u32 id); +int mlx5_qos_destroy_node(struct mlx5_core_dev *mdev, u32 id); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c new file mode 100644 index 0000000000..a42f6cd99b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies */ + +#include +#include +#include + +#include "lib/mlx5.h" +#include "eswitch.h" +#include "fs_core.h" +#include "rdma.h" + +static void mlx5_rdma_disable_roce_steering(struct mlx5_core_dev *dev) +{ + struct mlx5_core_roce *roce = &dev->priv.roce; + + mlx5_del_flow_rules(roce->allow_rule); + mlx5_destroy_flow_group(roce->fg); + mlx5_destroy_flow_table(roce->ft); +} + +static int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_core_roce *roce = &dev->priv.roce; + struct mlx5_flow_handle *flow_rule = NULL; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns = NULL; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct mlx5_eswitch *esw; + u32 *flow_group_in; + int err; + + if (!(MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) && + MLX5_CAP_FLOWTABLE_RDMA_RX(dev, table_miss_action_domain))) + return -EOPNOTSUPP; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + kvfree(flow_group_in); + return -ENOMEM; + } + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL); + if (!ns) { + mlx5_core_err(dev, "Failed to get RDMA RX namespace"); + err = -EOPNOTSUPP; + goto free; + } + + ft_attr.max_fte = 1; + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + mlx5_core_err(dev, "Failed to create RDMA RX flow table"); + err = PTR_ERR(ft); + goto free; + } + + esw = dev->priv.eswitch; + mlx5_esw_set_flow_group_source_port(esw, flow_group_in, 0); + + fg = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(fg)) { + err = PTR_ERR(fg); + mlx5_core_err(dev, "Failed to create RDMA RX flow group err(%d)\n", err); + goto destroy_flow_table; + } + + mlx5_esw_set_spec_source_port(esw, esw->manager_vport, spec); + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, NULL, 0); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + mlx5_core_err(dev, "Failed to add RoCE allow rule, err=%d\n", + err); + goto destroy_flow_group; + } + + kvfree(spec); + kvfree(flow_group_in); + roce->ft = ft; + roce->fg = fg; + roce->allow_rule = flow_rule; + + return 0; + +destroy_flow_group: + mlx5_destroy_flow_group(fg); +destroy_flow_table: + mlx5_destroy_flow_table(ft); +free: + kvfree(spec); + kvfree(flow_group_in); + return err; +} + +static void mlx5_rdma_del_roce_addr(struct mlx5_core_dev *dev) +{ + mlx5_core_roce_gid_set(dev, 0, MLX5_ROCE_VERSION_2, 0, + NULL, NULL, false, 0, 1); +} + +static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid *gid) +{ + u8 hw_id[ETH_ALEN]; + + mlx5_query_mac_address(dev, hw_id); + gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); + addrconf_addr_eui48(&gid->raw[8], hw_id); +} + +static int mlx5_rdma_add_roce_addr(struct mlx5_core_dev *dev) +{ + union ib_gid gid; + u8 mac[ETH_ALEN]; + + mlx5_rdma_make_default_gid(dev, &gid); + return mlx5_core_roce_gid_set(dev, 0, + MLX5_ROCE_VERSION_2, + 0, gid.raw, mac, + false, 0, 1); +} + +void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) +{ + struct mlx5_core_roce *roce = &dev->priv.roce; + + if (!roce->ft) + return; + + mlx5_rdma_disable_roce_steering(dev); + mlx5_rdma_del_roce_addr(dev); + mlx5_nic_vport_disable_roce(dev); +} + +void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) +{ + int err; + + if (!MLX5_CAP_GEN(dev, roce)) + return; + + err = mlx5_nic_vport_enable_roce(dev); + if (err) { + mlx5_core_err(dev, "Failed to enable RoCE: %d\n", err); + return; + } + + err = mlx5_rdma_add_roce_addr(dev); + if (err) { + mlx5_core_err(dev, "Failed to add RoCE address: %d\n", err); + goto disable_roce; + } + + err = mlx5_rdma_enable_roce_steering(dev); + if (err) { + mlx5_core_err(dev, "Failed to enable RoCE steering: %d\n", err); + goto del_roce_addr; + } + + return; + +del_roce_addr: + mlx5_rdma_del_roce_addr(dev); +disable_roce: + mlx5_nic_vport_disable_roce(dev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.h b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h new file mode 100644 index 0000000000..750cff2a71 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_RDMA_H__ +#define __MLX5_RDMA_H__ + +#include "mlx5_core.h" + +#ifdef CONFIG_MLX5_ESWITCH + +void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev); +void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev); + +#else /* CONFIG_MLX5_ESWITCH */ + +static inline void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) {} +static inline void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) {} + +#endif /* CONFIG_MLX5_ESWITCH */ +#endif /* __MLX5_RDMA_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c new file mode 100644 index 0000000000..9f8b4005f4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -0,0 +1,398 @@ +/* + * Copyright (c) 2013-2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include "mlx5_core.h" + +/* Scheduling element fw management */ +int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + void *ctx, u32 *element_id) +{ + u32 out[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {}; + u32 in[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {}; + void *schedc; + int err; + + schedc = MLX5_ADDR_OF(create_scheduling_element_in, in, + scheduling_context); + MLX5_SET(create_scheduling_element_in, in, opcode, + MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT); + MLX5_SET(create_scheduling_element_in, in, scheduling_hierarchy, + hierarchy); + memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context)); + + err = mlx5_cmd_exec_inout(dev, create_scheduling_element, in, out); + if (err) + return err; + + *element_id = MLX5_GET(create_scheduling_element_out, out, + scheduling_element_id); + return 0; +} + +int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + void *ctx, u32 element_id, + u32 modify_bitmask) +{ + u32 in[MLX5_ST_SZ_DW(modify_scheduling_element_in)] = {}; + void *schedc; + + schedc = MLX5_ADDR_OF(modify_scheduling_element_in, in, + scheduling_context); + MLX5_SET(modify_scheduling_element_in, in, opcode, + MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT); + MLX5_SET(modify_scheduling_element_in, in, scheduling_element_id, + element_id); + MLX5_SET(modify_scheduling_element_in, in, modify_bitmask, + modify_bitmask); + MLX5_SET(modify_scheduling_element_in, in, scheduling_hierarchy, + hierarchy); + memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context)); + + return mlx5_cmd_exec_in(dev, modify_scheduling_element, in); +} + +int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + u32 element_id) +{ + u32 in[MLX5_ST_SZ_DW(destroy_scheduling_element_in)] = {}; + + MLX5_SET(destroy_scheduling_element_in, in, opcode, + MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT); + MLX5_SET(destroy_scheduling_element_in, in, scheduling_element_id, + element_id); + MLX5_SET(destroy_scheduling_element_in, in, scheduling_hierarchy, + hierarchy); + + return mlx5_cmd_exec_in(dev, destroy_scheduling_element, in); +} + +static bool mlx5_rl_are_equal_raw(struct mlx5_rl_entry *entry, void *rl_in, + u16 uid) +{ + return (!memcmp(entry->rl_raw, rl_in, sizeof(entry->rl_raw)) && + entry->uid == uid); +} + +/* Finds an entry where we can register the given rate + * If the rate already exists, return the entry where it is registered, + * otherwise return the first available entry. + * If the table is full, return NULL + */ +static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table, + void *rl_in, u16 uid, bool dedicated) +{ + struct mlx5_rl_entry *ret_entry = NULL; + bool empty_found = false; + int i; + + lockdep_assert_held(&table->rl_lock); + WARN_ON(!table->rl_entry); + + for (i = 0; i < table->max_size; i++) { + if (dedicated) { + if (!table->rl_entry[i].refcount) + return &table->rl_entry[i]; + continue; + } + + if (table->rl_entry[i].refcount) { + if (table->rl_entry[i].dedicated) + continue; + if (mlx5_rl_are_equal_raw(&table->rl_entry[i], rl_in, + uid)) + return &table->rl_entry[i]; + } else if (!empty_found) { + empty_found = true; + ret_entry = &table->rl_entry[i]; + } + } + + return ret_entry; +} + +static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev, + struct mlx5_rl_entry *entry, bool set) +{ + u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {}; + void *pp_context; + + pp_context = MLX5_ADDR_OF(set_pp_rate_limit_in, in, ctx); + MLX5_SET(set_pp_rate_limit_in, in, opcode, + MLX5_CMD_OP_SET_PP_RATE_LIMIT); + MLX5_SET(set_pp_rate_limit_in, in, uid, entry->uid); + MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, entry->index); + if (set) + memcpy(pp_context, entry->rl_raw, sizeof(entry->rl_raw)); + return mlx5_cmd_exec_in(dev, set_pp_rate_limit, in); +} + +bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + + return (rate <= table->max_rate && rate >= table->min_rate); +} +EXPORT_SYMBOL(mlx5_rl_is_in_range); + +bool mlx5_rl_are_equal(struct mlx5_rate_limit *rl_0, + struct mlx5_rate_limit *rl_1) +{ + return ((rl_0->rate == rl_1->rate) && + (rl_0->max_burst_sz == rl_1->max_burst_sz) && + (rl_0->typical_pkt_sz == rl_1->typical_pkt_sz)); +} +EXPORT_SYMBOL(mlx5_rl_are_equal); + +static int mlx5_rl_table_get(struct mlx5_rl_table *table) +{ + int i; + + lockdep_assert_held(&table->rl_lock); + + if (table->rl_entry) { + table->refcount++; + return 0; + } + + table->rl_entry = kcalloc(table->max_size, sizeof(struct mlx5_rl_entry), + GFP_KERNEL); + if (!table->rl_entry) + return -ENOMEM; + + /* The index represents the index in HW rate limit table + * Index 0 is reserved for unlimited rate + */ + for (i = 0; i < table->max_size; i++) + table->rl_entry[i].index = i + 1; + + table->refcount++; + return 0; +} + +static void mlx5_rl_table_put(struct mlx5_rl_table *table) +{ + lockdep_assert_held(&table->rl_lock); + if (--table->refcount) + return; + + kfree(table->rl_entry); + table->rl_entry = NULL; +} + +static void mlx5_rl_table_free(struct mlx5_core_dev *dev, struct mlx5_rl_table *table) +{ + int i; + + if (!table->rl_entry) + return; + + /* Clear all configured rates */ + for (i = 0; i < table->max_size; i++) + if (table->rl_entry[i].refcount) + mlx5_set_pp_rate_limit_cmd(dev, &table->rl_entry[i], false); + kfree(table->rl_entry); +} + +static void mlx5_rl_entry_get(struct mlx5_rl_entry *entry) +{ + entry->refcount++; +} + +static void +mlx5_rl_entry_put(struct mlx5_core_dev *dev, struct mlx5_rl_entry *entry) +{ + entry->refcount--; + if (!entry->refcount) + mlx5_set_pp_rate_limit_cmd(dev, entry, false); +} + +int mlx5_rl_add_rate_raw(struct mlx5_core_dev *dev, void *rl_in, u16 uid, + bool dedicated_entry, u16 *index) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + struct mlx5_rl_entry *entry; + u32 rate; + int err; + + if (!table->max_size) + return -EOPNOTSUPP; + + rate = MLX5_GET(set_pp_rate_limit_context, rl_in, rate_limit); + if (!rate || !mlx5_rl_is_in_range(dev, rate)) { + mlx5_core_err(dev, "Invalid rate: %u, should be %u to %u\n", + rate, table->min_rate, table->max_rate); + return -EINVAL; + } + + mutex_lock(&table->rl_lock); + err = mlx5_rl_table_get(table); + if (err) + goto out; + + entry = find_rl_entry(table, rl_in, uid, dedicated_entry); + if (!entry) { + mlx5_core_err(dev, "Max number of %u rates reached\n", + table->max_size); + err = -ENOSPC; + goto rl_err; + } + if (!entry->refcount) { + /* new rate limit */ + memcpy(entry->rl_raw, rl_in, sizeof(entry->rl_raw)); + entry->uid = uid; + err = mlx5_set_pp_rate_limit_cmd(dev, entry, true); + if (err) { + mlx5_core_err( + dev, + "Failed configuring rate limit(err %d): rate %u, max_burst_sz %u, typical_pkt_sz %u\n", + err, rate, + MLX5_GET(set_pp_rate_limit_context, rl_in, + burst_upper_bound), + MLX5_GET(set_pp_rate_limit_context, rl_in, + typical_packet_size)); + goto rl_err; + } + + entry->dedicated = dedicated_entry; + } + mlx5_rl_entry_get(entry); + *index = entry->index; + mutex_unlock(&table->rl_lock); + return 0; + +rl_err: + mlx5_rl_table_put(table); +out: + mutex_unlock(&table->rl_lock); + return err; +} +EXPORT_SYMBOL(mlx5_rl_add_rate_raw); + +void mlx5_rl_remove_rate_raw(struct mlx5_core_dev *dev, u16 index) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + struct mlx5_rl_entry *entry; + + mutex_lock(&table->rl_lock); + entry = &table->rl_entry[index - 1]; + mlx5_rl_entry_put(dev, entry); + mlx5_rl_table_put(table); + mutex_unlock(&table->rl_lock); +} +EXPORT_SYMBOL(mlx5_rl_remove_rate_raw); + +int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u16 *index, + struct mlx5_rate_limit *rl) +{ + u8 rl_raw[MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)] = {}; + + MLX5_SET(set_pp_rate_limit_context, rl_raw, rate_limit, rl->rate); + MLX5_SET(set_pp_rate_limit_context, rl_raw, burst_upper_bound, + rl->max_burst_sz); + MLX5_SET(set_pp_rate_limit_context, rl_raw, typical_packet_size, + rl->typical_pkt_sz); + + return mlx5_rl_add_rate_raw(dev, rl_raw, + MLX5_CAP_QOS(dev, packet_pacing_uid) ? + MLX5_SHARED_RESOURCE_UID : 0, + false, index); +} +EXPORT_SYMBOL(mlx5_rl_add_rate); + +void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, struct mlx5_rate_limit *rl) +{ + u8 rl_raw[MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)] = {}; + struct mlx5_rl_table *table = &dev->priv.rl_table; + struct mlx5_rl_entry *entry = NULL; + + /* 0 is a reserved value for unlimited rate */ + if (rl->rate == 0) + return; + + MLX5_SET(set_pp_rate_limit_context, rl_raw, rate_limit, rl->rate); + MLX5_SET(set_pp_rate_limit_context, rl_raw, burst_upper_bound, + rl->max_burst_sz); + MLX5_SET(set_pp_rate_limit_context, rl_raw, typical_packet_size, + rl->typical_pkt_sz); + + mutex_lock(&table->rl_lock); + entry = find_rl_entry(table, rl_raw, + MLX5_CAP_QOS(dev, packet_pacing_uid) ? + MLX5_SHARED_RESOURCE_UID : 0, false); + if (!entry || !entry->refcount) { + mlx5_core_warn(dev, "Rate %u, max_burst_sz %u typical_pkt_sz %u are not configured\n", + rl->rate, rl->max_burst_sz, rl->typical_pkt_sz); + goto out; + } + mlx5_rl_entry_put(dev, entry); + mlx5_rl_table_put(table); +out: + mutex_unlock(&table->rl_lock); +} +EXPORT_SYMBOL(mlx5_rl_remove_rate); + +int mlx5_init_rl_table(struct mlx5_core_dev *dev) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + + if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, packet_pacing)) { + table->max_size = 0; + return 0; + } + + mutex_init(&table->rl_lock); + + /* First entry is reserved for unlimited rate */ + table->max_size = MLX5_CAP_QOS(dev, packet_pacing_rate_table_size) - 1; + table->max_rate = MLX5_CAP_QOS(dev, packet_pacing_max_rate); + table->min_rate = MLX5_CAP_QOS(dev, packet_pacing_min_rate); + + mlx5_core_info(dev, "Rate limit: %u rates are supported, range: %uMbps to %uMbps\n", + table->max_size, + table->min_rate >> 10, + table->max_rate >> 10); + + return 0; +} + +void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + + if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, packet_pacing)) + return; + + mlx5_rl_table_free(dev, table); + mutex_destroy(&table->rl_lock); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c new file mode 100644 index 0000000000..a8d75c2f02 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#include +#include "priv.h" + +int mlx5_cmd_alloc_sf(struct mlx5_core_dev *dev, u16 function_id) +{ + u32 out[MLX5_ST_SZ_DW(alloc_sf_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_sf_in)] = {}; + + MLX5_SET(alloc_sf_in, in, opcode, MLX5_CMD_OP_ALLOC_SF); + MLX5_SET(alloc_sf_in, in, function_id, function_id); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_cmd_dealloc_sf(struct mlx5_core_dev *dev, u16 function_id) +{ + u32 out[MLX5_ST_SZ_DW(dealloc_sf_out)] = {}; + u32 in[MLX5_ST_SZ_DW(dealloc_sf_in)] = {}; + + MLX5_SET(dealloc_sf_in, in, opcode, MLX5_CMD_OP_DEALLOC_SF); + MLX5_SET(dealloc_sf_in, in, function_id, function_id); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_cmd_sf_enable_hca(struct mlx5_core_dev *dev, u16 func_id) +{ + u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {}; + + MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); + MLX5_SET(enable_hca_in, in, function_id, func_id); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0); + return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); +} + +int mlx5_cmd_sf_disable_hca(struct mlx5_core_dev *dev, u16 func_id) +{ + u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {}; + + MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); + MLX5_SET(disable_hca_in, in, function_id, func_id); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c new file mode 100644 index 0000000000..05e148db98 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c @@ -0,0 +1,380 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#include +#include +#include "mlx5_core.h" +#include "dev.h" +#include "sf/vhca_event.h" +#include "sf/sf.h" +#include "sf/mlx5_ifc_vhca_event.h" +#include "ecpf.h" +#define CREATE_TRACE_POINTS +#include "diag/dev_tracepoint.h" + +struct mlx5_sf_dev_table { + struct xarray devices; + unsigned int max_sfs; + phys_addr_t base_address; + u64 sf_bar_length; + struct notifier_block nb; + struct mutex table_lock; /* Serializes sf life cycle and vhca state change handler */ + struct workqueue_struct *active_wq; + struct work_struct work; + u8 stop_active_wq:1; + struct mlx5_core_dev *dev; +}; + +static bool mlx5_sf_dev_supported(const struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN(dev, sf) && mlx5_vhca_event_supported(dev); +} + +bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev) +{ + struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table; + + return table && !xa_empty(&table->devices); +} + +static ssize_t sfnum_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct auxiliary_device *adev = container_of(dev, struct auxiliary_device, dev); + struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); + + return sysfs_emit(buf, "%u\n", sf_dev->sfnum); +} +static DEVICE_ATTR_RO(sfnum); + +static struct attribute *sf_device_attrs[] = { + &dev_attr_sfnum.attr, + NULL, +}; + +static const struct attribute_group sf_attr_group = { + .attrs = sf_device_attrs, +}; + +static const struct attribute_group *sf_attr_groups[2] = { + &sf_attr_group, + NULL +}; + +static void mlx5_sf_dev_release(struct device *device) +{ + struct auxiliary_device *adev = container_of(device, struct auxiliary_device, dev); + struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); + + mlx5_adev_idx_free(adev->id); + kfree(sf_dev); +} + +static void mlx5_sf_dev_remove(struct mlx5_core_dev *dev, struct mlx5_sf_dev *sf_dev) +{ + int id; + + id = sf_dev->adev.id; + trace_mlx5_sf_dev_del(dev, sf_dev, id); + + auxiliary_device_delete(&sf_dev->adev); + auxiliary_device_uninit(&sf_dev->adev); +} + +static void mlx5_sf_dev_add(struct mlx5_core_dev *dev, u16 sf_index, u16 fn_id, u32 sfnum) +{ + struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table; + struct mlx5_sf_dev *sf_dev; + struct pci_dev *pdev; + int err; + int id; + + id = mlx5_adev_idx_alloc(); + if (id < 0) { + err = id; + goto add_err; + } + + sf_dev = kzalloc(sizeof(*sf_dev), GFP_KERNEL); + if (!sf_dev) { + mlx5_adev_idx_free(id); + err = -ENOMEM; + goto add_err; + } + pdev = dev->pdev; + sf_dev->adev.id = id; + sf_dev->adev.name = MLX5_SF_DEV_ID_NAME; + sf_dev->adev.dev.release = mlx5_sf_dev_release; + sf_dev->adev.dev.parent = &pdev->dev; + sf_dev->adev.dev.groups = sf_attr_groups; + sf_dev->sfnum = sfnum; + sf_dev->parent_mdev = dev; + sf_dev->fn_id = fn_id; + + if (!table->max_sfs) { + mlx5_adev_idx_free(id); + kfree(sf_dev); + err = -EOPNOTSUPP; + goto add_err; + } + sf_dev->bar_base_addr = table->base_address + (sf_index * table->sf_bar_length); + + trace_mlx5_sf_dev_add(dev, sf_dev, id); + + err = auxiliary_device_init(&sf_dev->adev); + if (err) { + mlx5_adev_idx_free(id); + kfree(sf_dev); + goto add_err; + } + + err = auxiliary_device_add(&sf_dev->adev); + if (err) { + auxiliary_device_uninit(&sf_dev->adev); + goto add_err; + } + + err = xa_insert(&table->devices, sf_index, sf_dev, GFP_KERNEL); + if (err) + goto xa_err; + return; + +xa_err: + mlx5_sf_dev_remove(dev, sf_dev); +add_err: + mlx5_core_err(dev, "SF DEV: fail device add for index=%d sfnum=%d err=%d\n", + sf_index, sfnum, err); +} + +static void mlx5_sf_dev_del(struct mlx5_core_dev *dev, struct mlx5_sf_dev *sf_dev, u16 sf_index) +{ + struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table; + + xa_erase(&table->devices, sf_index); + mlx5_sf_dev_remove(dev, sf_dev); +} + +static int +mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_code, void *data) +{ + struct mlx5_sf_dev_table *table = container_of(nb, struct mlx5_sf_dev_table, nb); + const struct mlx5_vhca_state_event *event = data; + struct mlx5_sf_dev *sf_dev; + u16 max_functions; + u16 sf_index; + u16 base_id; + + max_functions = mlx5_sf_max_functions(table->dev); + if (!max_functions) + return 0; + + base_id = mlx5_sf_start_function_id(table->dev); + if (event->function_id < base_id || event->function_id >= (base_id + max_functions)) + return 0; + + sf_index = event->function_id - base_id; + mutex_lock(&table->table_lock); + sf_dev = xa_load(&table->devices, sf_index); + switch (event->new_vhca_state) { + case MLX5_VHCA_STATE_INVALID: + case MLX5_VHCA_STATE_ALLOCATED: + if (sf_dev) + mlx5_sf_dev_del(table->dev, sf_dev, sf_index); + break; + case MLX5_VHCA_STATE_TEARDOWN_REQUEST: + if (sf_dev) + mlx5_sf_dev_del(table->dev, sf_dev, sf_index); + else + mlx5_core_err(table->dev, + "SF DEV: teardown state for invalid dev index=%d sfnum=0x%x\n", + sf_index, event->sw_function_id); + break; + case MLX5_VHCA_STATE_ACTIVE: + if (!sf_dev) + mlx5_sf_dev_add(table->dev, sf_index, event->function_id, + event->sw_function_id); + break; + default: + break; + } + mutex_unlock(&table->table_lock); + return 0; +} + +static int mlx5_sf_dev_vhca_arm_all(struct mlx5_sf_dev_table *table) +{ + struct mlx5_core_dev *dev = table->dev; + u16 max_functions; + u16 function_id; + int err = 0; + int i; + + max_functions = mlx5_sf_max_functions(dev); + function_id = mlx5_sf_start_function_id(dev); + /* Arm the vhca context as the vhca event notifier */ + for (i = 0; i < max_functions; i++) { + err = mlx5_vhca_event_arm(dev, function_id); + if (err) + return err; + + function_id++; + } + return 0; +} + +static void mlx5_sf_dev_add_active_work(struct work_struct *work) +{ + struct mlx5_sf_dev_table *table = container_of(work, struct mlx5_sf_dev_table, work); + u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {}; + struct mlx5_core_dev *dev = table->dev; + u16 max_functions; + u16 function_id; + u16 sw_func_id; + int err = 0; + u8 state; + int i; + + max_functions = mlx5_sf_max_functions(dev); + function_id = mlx5_sf_start_function_id(dev); + for (i = 0; i < max_functions; i++, function_id++) { + if (table->stop_active_wq) + return; + err = mlx5_cmd_query_vhca_state(dev, function_id, out, sizeof(out)); + if (err) + /* A failure of specific vhca doesn't mean others will + * fail as well. + */ + continue; + state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state); + if (state != MLX5_VHCA_STATE_ACTIVE) + continue; + + sw_func_id = MLX5_GET(query_vhca_state_out, out, vhca_state_context.sw_function_id); + mutex_lock(&table->table_lock); + /* Don't probe device which is already probe */ + if (!xa_load(&table->devices, i)) + mlx5_sf_dev_add(dev, i, function_id, sw_func_id); + /* There is a race where SF got inactive after the query + * above. e.g.: the query returns that the state of the + * SF is active, and after that the eswitch manager set it to + * inactive. + * This case cannot be managed in SW, since the probing of the + * SF is on one system, and the inactivation is on a different + * system. + * If the inactive is done after the SF perform init_hca(), + * the SF will fully probe and then removed. If it was + * done before init_hca(), the SF probe will fail. + */ + mutex_unlock(&table->table_lock); + } +} + +/* In case SFs are generated externally, probe active SFs */ +static int mlx5_sf_dev_queue_active_work(struct mlx5_sf_dev_table *table) +{ + if (MLX5_CAP_GEN(table->dev, eswitch_manager)) + return 0; /* the table is local */ + + /* Use a workqueue to probe active SFs, which are in large + * quantity and may take up to minutes to probe. + */ + table->active_wq = create_singlethread_workqueue("mlx5_active_sf"); + if (!table->active_wq) + return -ENOMEM; + INIT_WORK(&table->work, &mlx5_sf_dev_add_active_work); + queue_work(table->active_wq, &table->work); + return 0; +} + +static void mlx5_sf_dev_destroy_active_work(struct mlx5_sf_dev_table *table) +{ + if (table->active_wq) { + table->stop_active_wq = true; + destroy_workqueue(table->active_wq); + } +} + +void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_dev_table *table; + unsigned int max_sfs; + int err; + + if (!mlx5_sf_dev_supported(dev)) + return; + + table = kzalloc(sizeof(*table), GFP_KERNEL); + if (!table) { + err = -ENOMEM; + goto table_err; + } + + table->nb.notifier_call = mlx5_sf_dev_state_change_handler; + table->dev = dev; + if (MLX5_CAP_GEN(dev, max_num_sf)) + max_sfs = MLX5_CAP_GEN(dev, max_num_sf); + else + max_sfs = 1 << MLX5_CAP_GEN(dev, log_max_sf); + table->sf_bar_length = 1 << (MLX5_CAP_GEN(dev, log_min_sf_size) + 12); + table->base_address = pci_resource_start(dev->pdev, 2); + table->max_sfs = max_sfs; + xa_init(&table->devices); + mutex_init(&table->table_lock); + dev->priv.sf_dev_table = table; + + err = mlx5_vhca_event_notifier_register(dev, &table->nb); + if (err) + goto vhca_err; + + err = mlx5_sf_dev_queue_active_work(table); + if (err) + goto add_active_err; + + err = mlx5_sf_dev_vhca_arm_all(table); + if (err) + goto arm_err; + mlx5_core_dbg(dev, "SF DEV: max sf devices=%d\n", max_sfs); + return; + +arm_err: + mlx5_sf_dev_destroy_active_work(table); +add_active_err: + mlx5_vhca_event_notifier_unregister(dev, &table->nb); +vhca_err: + table->max_sfs = 0; + kfree(table); + dev->priv.sf_dev_table = NULL; +table_err: + mlx5_core_err(dev, "SF DEV table create err = %d\n", err); +} + +static void mlx5_sf_dev_destroy_all(struct mlx5_sf_dev_table *table) +{ + struct mlx5_sf_dev *sf_dev; + unsigned long index; + + xa_for_each(&table->devices, index, sf_dev) { + xa_erase(&table->devices, index); + mlx5_sf_dev_remove(table->dev, sf_dev); + } +} + +void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table; + + if (!table) + return; + + mlx5_sf_dev_destroy_active_work(table); + mlx5_vhca_event_notifier_unregister(dev, &table->nb); + mutex_destroy(&table->table_lock); + + /* Now that event handler is not running, it is safe to destroy + * the sf device without race. + */ + mlx5_sf_dev_destroy_all(table); + + WARN_ON(!xa_empty(&table->devices)); + kfree(table); + dev->priv.sf_dev_table = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h new file mode 100644 index 0000000000..2a66a427ef --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#ifndef __MLX5_SF_DEV_H__ +#define __MLX5_SF_DEV_H__ + +#ifdef CONFIG_MLX5_SF + +#include + +#define MLX5_SF_DEV_ID_NAME "sf" + +struct mlx5_sf_dev { + struct auxiliary_device adev; + struct mlx5_core_dev *parent_mdev; + struct mlx5_core_dev *mdev; + phys_addr_t bar_base_addr; + u32 sfnum; + u16 fn_id; +}; + +void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev); +void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev); + +int mlx5_sf_driver_register(void); +void mlx5_sf_driver_unregister(void); + +bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev); + +#else + +static inline void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev) +{ +} + +static inline void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev) +{ +} + +static inline int mlx5_sf_driver_register(void) +{ + return 0; +} + +static inline void mlx5_sf_driver_unregister(void) +{ +} + +static inline bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev) +{ + return false; +} + +#endif + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h new file mode 100644 index 0000000000..7f7c9af5de --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_SF_DEV_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_SF_DEV_TP_ + +#include +#include +#include "../../dev/dev.h" + +DECLARE_EVENT_CLASS(mlx5_sf_dev_template, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_sf_dev *sfdev, + int aux_id), + TP_ARGS(dev, sfdev, aux_id), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(const struct mlx5_sf_dev*, sfdev) + __field(int, aux_id) + __field(u16, hw_fn_id) + __field(u32, sfnum) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->sfdev = sfdev; + __entry->aux_id = aux_id; + __entry->hw_fn_id = sfdev->fn_id; + __entry->sfnum = sfdev->sfnum; + ), + TP_printk("(%s) sfdev=%pK aux_id=%d hw_id=0x%x sfnum=%u\n", + __get_str(devname), __entry->sfdev, + __entry->aux_id, __entry->hw_fn_id, + __entry->sfnum) +); + +DEFINE_EVENT(mlx5_sf_dev_template, mlx5_sf_dev_add, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_sf_dev *sfdev, + int aux_id), + TP_ARGS(dev, sfdev, aux_id) + ); + +DEFINE_EVENT(mlx5_sf_dev_template, mlx5_sf_dev_del, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_sf_dev *sfdev, + int aux_id), + TP_ARGS(dev, sfdev, aux_id) + ); + +#endif /* _MLX5_SF_DEV_TP_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH sf/dev/diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE dev_tracepoint +#include diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c new file mode 100644 index 0000000000..8fe82f1191 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#include +#include +#include +#include "mlx5_core.h" +#include "dev.h" +#include "devlink.h" + +static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id) +{ + struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); + struct mlx5_core_dev *mdev; + struct devlink *devlink; + int err; + + devlink = mlx5_devlink_alloc(&adev->dev); + if (!devlink) + return -ENOMEM; + + mdev = devlink_priv(devlink); + mdev->device = &adev->dev; + mdev->pdev = sf_dev->parent_mdev->pdev; + mdev->bar_addr = sf_dev->bar_base_addr; + mdev->iseg_base = sf_dev->bar_base_addr; + mdev->coredev_type = MLX5_COREDEV_SF; + mdev->priv.parent_mdev = sf_dev->parent_mdev; + mdev->priv.adev_idx = adev->id; + sf_dev->mdev = mdev; + + /* Only local SFs do light probe */ + if (MLX5_ESWITCH_MANAGER(sf_dev->parent_mdev)) + mlx5_dev_set_lightweight(mdev); + + err = mlx5_mdev_init(mdev, MLX5_SF_PROF); + if (err) { + mlx5_core_warn(mdev, "mlx5_mdev_init on err=%d\n", err); + goto mdev_err; + } + + mdev->iseg = ioremap(mdev->iseg_base, sizeof(*mdev->iseg)); + if (!mdev->iseg) { + mlx5_core_warn(mdev, "remap error\n"); + err = -ENOMEM; + goto remap_err; + } + + if (MLX5_ESWITCH_MANAGER(sf_dev->parent_mdev)) + err = mlx5_init_one_light(mdev); + else + err = mlx5_init_one(mdev); + if (err) { + mlx5_core_warn(mdev, "mlx5_init_one err=%d\n", err); + goto init_one_err; + } + devlink_register(devlink); + return 0; + +init_one_err: + iounmap(mdev->iseg); +remap_err: + mlx5_mdev_uninit(mdev); +mdev_err: + mlx5_devlink_free(devlink); + return err; +} + +static void mlx5_sf_dev_remove(struct auxiliary_device *adev) +{ + struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); + struct devlink *devlink = priv_to_devlink(sf_dev->mdev); + + mlx5_drain_health_wq(sf_dev->mdev); + devlink_unregister(devlink); + if (mlx5_dev_is_lightweight(sf_dev->mdev)) + mlx5_uninit_one_light(sf_dev->mdev); + else + mlx5_uninit_one(sf_dev->mdev); + iounmap(sf_dev->mdev->iseg); + mlx5_mdev_uninit(sf_dev->mdev); + mlx5_devlink_free(devlink); +} + +static void mlx5_sf_dev_shutdown(struct auxiliary_device *adev) +{ + struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); + + mlx5_unload_one(sf_dev->mdev, false); +} + +static const struct auxiliary_device_id mlx5_sf_dev_id_table[] = { + { .name = MLX5_ADEV_NAME "." MLX5_SF_DEV_ID_NAME, }, + { }, +}; + +MODULE_DEVICE_TABLE(auxiliary, mlx5_sf_dev_id_table); + +static struct auxiliary_driver mlx5_sf_driver = { + .name = MLX5_SF_DEV_ID_NAME, + .probe = mlx5_sf_dev_probe, + .remove = mlx5_sf_dev_remove, + .shutdown = mlx5_sf_dev_shutdown, + .id_table = mlx5_sf_dev_id_table, +}; + +int mlx5_sf_driver_register(void) +{ + return auxiliary_driver_register(&mlx5_sf_driver); +} + +void mlx5_sf_driver_unregister(void) +{ + auxiliary_driver_unregister(&mlx5_sf_driver); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c new file mode 100644 index 0000000000..e34a8f88c5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -0,0 +1,571 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#include +#include "eswitch.h" +#include "priv.h" +#include "sf/dev/dev.h" +#include "mlx5_ifc_vhca_event.h" +#include "vhca_event.h" +#include "ecpf.h" +#define CREATE_TRACE_POINTS +#include "diag/sf_tracepoint.h" + +struct mlx5_sf { + struct mlx5_devlink_port dl_port; + unsigned int port_index; + u32 controller; + u16 id; + u16 hw_fn_id; + u16 hw_state; +}; + +struct mlx5_sf_table { + struct mlx5_core_dev *dev; /* To refer from notifier context. */ + struct xarray port_indices; /* port index based lookup. */ + refcount_t refcount; + struct completion disable_complete; + struct mutex sf_state_lock; /* Serializes sf state among user cmds & vhca event handler. */ + struct notifier_block esw_nb; + struct notifier_block vhca_nb; +}; + +static struct mlx5_sf * +mlx5_sf_lookup_by_index(struct mlx5_sf_table *table, unsigned int port_index) +{ + return xa_load(&table->port_indices, port_index); +} + +static struct mlx5_sf * +mlx5_sf_lookup_by_function_id(struct mlx5_sf_table *table, unsigned int fn_id) +{ + unsigned long index; + struct mlx5_sf *sf; + + xa_for_each(&table->port_indices, index, sf) { + if (sf->hw_fn_id == fn_id) + return sf; + } + return NULL; +} + +static int mlx5_sf_id_insert(struct mlx5_sf_table *table, struct mlx5_sf *sf) +{ + return xa_insert(&table->port_indices, sf->port_index, sf, GFP_KERNEL); +} + +static void mlx5_sf_id_erase(struct mlx5_sf_table *table, struct mlx5_sf *sf) +{ + xa_erase(&table->port_indices, sf->port_index); +} + +static struct mlx5_sf * +mlx5_sf_alloc(struct mlx5_sf_table *table, struct mlx5_eswitch *esw, + u32 controller, u32 sfnum, struct netlink_ext_ack *extack) +{ + unsigned int dl_port_index; + struct mlx5_sf *sf; + u16 hw_fn_id; + int id_err; + int err; + + if (!mlx5_esw_offloads_controller_valid(esw, controller)) { + NL_SET_ERR_MSG_MOD(extack, "Invalid controller number"); + return ERR_PTR(-EINVAL); + } + + id_err = mlx5_sf_hw_table_sf_alloc(table->dev, controller, sfnum); + if (id_err < 0) { + err = id_err; + goto id_err; + } + + sf = kzalloc(sizeof(*sf), GFP_KERNEL); + if (!sf) { + err = -ENOMEM; + goto alloc_err; + } + sf->id = id_err; + hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, controller, sf->id); + dl_port_index = mlx5_esw_vport_to_devlink_port_index(table->dev, hw_fn_id); + sf->port_index = dl_port_index; + sf->hw_fn_id = hw_fn_id; + sf->hw_state = MLX5_VHCA_STATE_ALLOCATED; + sf->controller = controller; + + err = mlx5_sf_id_insert(table, sf); + if (err) + goto insert_err; + + return sf; + +insert_err: + kfree(sf); +alloc_err: + mlx5_sf_hw_table_sf_free(table->dev, controller, id_err); +id_err: + if (err == -EEXIST) + NL_SET_ERR_MSG_MOD(extack, "SF already exist. Choose different sfnum"); + return ERR_PTR(err); +} + +static void mlx5_sf_free(struct mlx5_sf_table *table, struct mlx5_sf *sf) +{ + mlx5_sf_id_erase(table, sf); + mlx5_sf_hw_table_sf_free(table->dev, sf->controller, sf->id); + trace_mlx5_sf_free(table->dev, sf->port_index, sf->controller, sf->hw_fn_id); + kfree(sf); +} + +static struct mlx5_sf_table *mlx5_sf_table_try_get(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_table *table = dev->priv.sf_table; + + if (!table) + return NULL; + + return refcount_inc_not_zero(&table->refcount) ? table : NULL; +} + +static void mlx5_sf_table_put(struct mlx5_sf_table *table) +{ + if (refcount_dec_and_test(&table->refcount)) + complete(&table->disable_complete); +} + +static enum devlink_port_fn_state mlx5_sf_to_devlink_state(u8 hw_state) +{ + switch (hw_state) { + case MLX5_VHCA_STATE_ACTIVE: + case MLX5_VHCA_STATE_IN_USE: + return DEVLINK_PORT_FN_STATE_ACTIVE; + case MLX5_VHCA_STATE_INVALID: + case MLX5_VHCA_STATE_ALLOCATED: + case MLX5_VHCA_STATE_TEARDOWN_REQUEST: + default: + return DEVLINK_PORT_FN_STATE_INACTIVE; + } +} + +static enum devlink_port_fn_opstate mlx5_sf_to_devlink_opstate(u8 hw_state) +{ + switch (hw_state) { + case MLX5_VHCA_STATE_IN_USE: + case MLX5_VHCA_STATE_TEARDOWN_REQUEST: + return DEVLINK_PORT_FN_OPSTATE_ATTACHED; + case MLX5_VHCA_STATE_INVALID: + case MLX5_VHCA_STATE_ALLOCATED: + case MLX5_VHCA_STATE_ACTIVE: + default: + return DEVLINK_PORT_FN_OPSTATE_DETACHED; + } +} + +static bool mlx5_sf_is_active(const struct mlx5_sf *sf) +{ + return sf->hw_state == MLX5_VHCA_STATE_ACTIVE || sf->hw_state == MLX5_VHCA_STATE_IN_USE; +} + +int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port, + enum devlink_port_fn_state *state, + enum devlink_port_fn_opstate *opstate, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink); + struct mlx5_sf_table *table; + struct mlx5_sf *sf; + int err = 0; + + table = mlx5_sf_table_try_get(dev); + if (!table) + return -EOPNOTSUPP; + + sf = mlx5_sf_lookup_by_index(table, dl_port->index); + if (!sf) { + err = -EOPNOTSUPP; + goto sf_err; + } + mutex_lock(&table->sf_state_lock); + *state = mlx5_sf_to_devlink_state(sf->hw_state); + *opstate = mlx5_sf_to_devlink_opstate(sf->hw_state); + mutex_unlock(&table->sf_state_lock); +sf_err: + mlx5_sf_table_put(table); + return err; +} + +static int mlx5_sf_activate(struct mlx5_core_dev *dev, struct mlx5_sf *sf, + struct netlink_ext_ack *extack) +{ + int err; + + if (mlx5_sf_is_active(sf)) + return 0; + if (sf->hw_state != MLX5_VHCA_STATE_ALLOCATED) { + NL_SET_ERR_MSG_MOD(extack, "SF is inactivated but it is still attached"); + return -EBUSY; + } + + err = mlx5_cmd_sf_enable_hca(dev, sf->hw_fn_id); + if (err) + return err; + + sf->hw_state = MLX5_VHCA_STATE_ACTIVE; + trace_mlx5_sf_activate(dev, sf->port_index, sf->controller, sf->hw_fn_id); + return 0; +} + +static int mlx5_sf_deactivate(struct mlx5_core_dev *dev, struct mlx5_sf *sf) +{ + int err; + + if (!mlx5_sf_is_active(sf)) + return 0; + + err = mlx5_cmd_sf_disable_hca(dev, sf->hw_fn_id); + if (err) + return err; + + sf->hw_state = MLX5_VHCA_STATE_TEARDOWN_REQUEST; + trace_mlx5_sf_deactivate(dev, sf->port_index, sf->controller, sf->hw_fn_id); + return 0; +} + +static int mlx5_sf_state_set(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, + struct mlx5_sf *sf, + enum devlink_port_fn_state state, + struct netlink_ext_ack *extack) +{ + int err = 0; + + mutex_lock(&table->sf_state_lock); + if (state == mlx5_sf_to_devlink_state(sf->hw_state)) + goto out; + if (state == DEVLINK_PORT_FN_STATE_ACTIVE) + err = mlx5_sf_activate(dev, sf, extack); + else if (state == DEVLINK_PORT_FN_STATE_INACTIVE) + err = mlx5_sf_deactivate(dev, sf); + else + err = -EINVAL; +out: + mutex_unlock(&table->sf_state_lock); + return err; +} + +int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port, + enum devlink_port_fn_state state, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink); + struct mlx5_sf_table *table; + struct mlx5_sf *sf; + int err; + + table = mlx5_sf_table_try_get(dev); + if (!table) { + NL_SET_ERR_MSG_MOD(extack, + "Port state set is only supported in eswitch switchdev mode or SF ports are disabled."); + return -EOPNOTSUPP; + } + sf = mlx5_sf_lookup_by_index(table, dl_port->index); + if (!sf) { + err = -ENODEV; + goto out; + } + + err = mlx5_sf_state_set(dev, table, sf, state, extack); +out: + mlx5_sf_table_put(table); + return err; +} + +static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, + const struct devlink_port_new_attrs *new_attr, + struct netlink_ext_ack *extack, + struct devlink_port **dl_port) +{ + struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5_sf *sf; + int err; + + sf = mlx5_sf_alloc(table, esw, new_attr->controller, new_attr->sfnum, extack); + if (IS_ERR(sf)) + return PTR_ERR(sf); + + err = mlx5_eswitch_load_sf_vport(esw, sf->hw_fn_id, MLX5_VPORT_UC_ADDR_CHANGE, + &sf->dl_port, new_attr->controller, new_attr->sfnum); + if (err) + goto esw_err; + *dl_port = &sf->dl_port.dl_port; + trace_mlx5_sf_add(dev, sf->port_index, sf->controller, sf->hw_fn_id, new_attr->sfnum); + return 0; + +esw_err: + mlx5_sf_free(table, sf); + return err; +} + +static int +mlx5_sf_new_check_attr(struct mlx5_core_dev *dev, const struct devlink_port_new_attrs *new_attr, + struct netlink_ext_ack *extack) +{ + if (new_attr->flavour != DEVLINK_PORT_FLAVOUR_PCI_SF) { + NL_SET_ERR_MSG_MOD(extack, "Driver supports only SF port addition"); + return -EOPNOTSUPP; + } + if (new_attr->port_index_valid) { + NL_SET_ERR_MSG_MOD(extack, + "Driver does not support user defined port index assignment"); + return -EOPNOTSUPP; + } + if (!new_attr->sfnum_valid) { + NL_SET_ERR_MSG_MOD(extack, + "User must provide unique sfnum. Driver does not support auto assignment"); + return -EOPNOTSUPP; + } + if (new_attr->controller_valid && new_attr->controller && + !mlx5_core_is_ecpf_esw_manager(dev)) { + NL_SET_ERR_MSG_MOD(extack, "External controller is unsupported"); + return -EOPNOTSUPP; + } + if (new_attr->pfnum != mlx5_get_dev_index(dev)) { + NL_SET_ERR_MSG_MOD(extack, "Invalid pfnum supplied"); + return -EOPNOTSUPP; + } + return 0; +} + +int mlx5_devlink_sf_port_new(struct devlink *devlink, + const struct devlink_port_new_attrs *new_attr, + struct netlink_ext_ack *extack, + struct devlink_port **dl_port) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_sf_table *table; + int err; + + err = mlx5_sf_new_check_attr(dev, new_attr, extack); + if (err) + return err; + + table = mlx5_sf_table_try_get(dev); + if (!table) { + NL_SET_ERR_MSG_MOD(extack, + "Port add is only supported in eswitch switchdev mode or SF ports are disabled."); + return -EOPNOTSUPP; + } + err = mlx5_sf_add(dev, table, new_attr, extack, dl_port); + mlx5_sf_table_put(table); + return err; +} + +static void mlx5_sf_dealloc(struct mlx5_sf_table *table, struct mlx5_sf *sf) +{ + if (sf->hw_state == MLX5_VHCA_STATE_ALLOCATED) { + mlx5_sf_free(table, sf); + } else if (mlx5_sf_is_active(sf)) { + /* Even if its active, it is treated as in_use because by the time, + * it is disabled here, it may getting used. So it is safe to + * always look for the event to ensure that it is recycled only after + * firmware gives confirmation that it is detached by the driver. + */ + mlx5_cmd_sf_disable_hca(table->dev, sf->hw_fn_id); + mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->controller, sf->id); + kfree(sf); + } else { + mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->controller, sf->id); + kfree(sf); + } +} + +int mlx5_devlink_sf_port_del(struct devlink *devlink, + struct devlink_port *dl_port, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5_sf_table *table; + struct mlx5_sf *sf; + int err = 0; + + table = mlx5_sf_table_try_get(dev); + if (!table) { + NL_SET_ERR_MSG_MOD(extack, + "Port del is only supported in eswitch switchdev mode or SF ports are disabled."); + return -EOPNOTSUPP; + } + sf = mlx5_sf_lookup_by_index(table, dl_port->index); + if (!sf) { + err = -ENODEV; + goto sf_err; + } + + mlx5_eswitch_unload_sf_vport(esw, sf->hw_fn_id); + mlx5_sf_id_erase(table, sf); + + mutex_lock(&table->sf_state_lock); + mlx5_sf_dealloc(table, sf); + mutex_unlock(&table->sf_state_lock); +sf_err: + mlx5_sf_table_put(table); + return err; +} + +static bool mlx5_sf_state_update_check(const struct mlx5_sf *sf, u8 new_state) +{ + if (sf->hw_state == MLX5_VHCA_STATE_ACTIVE && new_state == MLX5_VHCA_STATE_IN_USE) + return true; + + if (sf->hw_state == MLX5_VHCA_STATE_IN_USE && new_state == MLX5_VHCA_STATE_ACTIVE) + return true; + + if (sf->hw_state == MLX5_VHCA_STATE_TEARDOWN_REQUEST && + new_state == MLX5_VHCA_STATE_ALLOCATED) + return true; + + return false; +} + +static int mlx5_sf_vhca_event(struct notifier_block *nb, unsigned long opcode, void *data) +{ + struct mlx5_sf_table *table = container_of(nb, struct mlx5_sf_table, vhca_nb); + const struct mlx5_vhca_state_event *event = data; + bool update = false; + struct mlx5_sf *sf; + + table = mlx5_sf_table_try_get(table->dev); + if (!table) + return 0; + + mutex_lock(&table->sf_state_lock); + sf = mlx5_sf_lookup_by_function_id(table, event->function_id); + if (!sf) + goto sf_err; + + /* When driver is attached or detached to a function, an event + * notifies such state change. + */ + update = mlx5_sf_state_update_check(sf, event->new_vhca_state); + if (update) + sf->hw_state = event->new_vhca_state; + trace_mlx5_sf_update_state(table->dev, sf->port_index, sf->controller, + sf->hw_fn_id, sf->hw_state); +sf_err: + mutex_unlock(&table->sf_state_lock); + mlx5_sf_table_put(table); + return 0; +} + +static void mlx5_sf_table_enable(struct mlx5_sf_table *table) +{ + init_completion(&table->disable_complete); + refcount_set(&table->refcount, 1); +} + +static void mlx5_sf_deactivate_all(struct mlx5_sf_table *table) +{ + struct mlx5_eswitch *esw = table->dev->priv.eswitch; + unsigned long index; + struct mlx5_sf *sf; + + /* At this point, no new user commands can start and no vhca event can + * arrive. It is safe to destroy all user created SFs. + */ + xa_for_each(&table->port_indices, index, sf) { + mlx5_eswitch_unload_sf_vport(esw, sf->hw_fn_id); + mlx5_sf_id_erase(table, sf); + mlx5_sf_dealloc(table, sf); + } +} + +static void mlx5_sf_table_disable(struct mlx5_sf_table *table) +{ + if (!refcount_read(&table->refcount)) + return; + + /* Balances with refcount_set; drop the reference so that new user cmd cannot start + * and new vhca event handler cannot run. + */ + mlx5_sf_table_put(table); + wait_for_completion(&table->disable_complete); + + mlx5_sf_deactivate_all(table); +} + +static int mlx5_sf_esw_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5_sf_table *table = container_of(nb, struct mlx5_sf_table, esw_nb); + const struct mlx5_esw_event_info *mode = data; + + switch (mode->new_mode) { + case MLX5_ESWITCH_OFFLOADS: + mlx5_sf_table_enable(table); + break; + case MLX5_ESWITCH_LEGACY: + mlx5_sf_table_disable(table); + break; + default: + break; + } + + return 0; +} + +static bool mlx5_sf_table_supported(const struct mlx5_core_dev *dev) +{ + return dev->priv.eswitch && MLX5_ESWITCH_MANAGER(dev) && + mlx5_sf_hw_table_supported(dev); +} + +int mlx5_sf_table_init(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_table *table; + int err; + + if (!mlx5_sf_table_supported(dev) || !mlx5_vhca_event_supported(dev)) + return 0; + + table = kzalloc(sizeof(*table), GFP_KERNEL); + if (!table) + return -ENOMEM; + + mutex_init(&table->sf_state_lock); + table->dev = dev; + xa_init(&table->port_indices); + dev->priv.sf_table = table; + refcount_set(&table->refcount, 0); + table->esw_nb.notifier_call = mlx5_sf_esw_event; + err = mlx5_esw_event_notifier_register(dev->priv.eswitch, &table->esw_nb); + if (err) + goto reg_err; + + table->vhca_nb.notifier_call = mlx5_sf_vhca_event; + err = mlx5_vhca_event_notifier_register(table->dev, &table->vhca_nb); + if (err) + goto vhca_err; + + return 0; + +vhca_err: + mlx5_esw_event_notifier_unregister(dev->priv.eswitch, &table->esw_nb); +reg_err: + mutex_destroy(&table->sf_state_lock); + kfree(table); + dev->priv.sf_table = NULL; + return err; +} + +void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_table *table = dev->priv.sf_table; + + if (!table) + return; + + mlx5_vhca_event_notifier_unregister(table->dev, &table->vhca_nb); + mlx5_esw_event_notifier_unregister(dev->priv.eswitch, &table->esw_nb); + WARN_ON(refcount_read(&table->refcount)); + mutex_destroy(&table->sf_state_lock); + WARN_ON(!xa_empty(&table->port_indices)); + kfree(table); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/diag/sf_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/diag/sf_tracepoint.h new file mode 100644 index 0000000000..8bf1cd9093 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/diag/sf_tracepoint.h @@ -0,0 +1,173 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_SF_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_SF_TP_ + +#include +#include +#include "sf/vhca_event.h" + +TRACE_EVENT(mlx5_sf_add, + TP_PROTO(const struct mlx5_core_dev *dev, + unsigned int port_index, + u32 controller, + u16 hw_fn_id, + u32 sfnum), + TP_ARGS(dev, port_index, controller, hw_fn_id, sfnum), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(unsigned int, port_index) + __field(u32, controller) + __field(u16, hw_fn_id) + __field(u32, sfnum) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->port_index = port_index; + __entry->controller = controller; + __entry->hw_fn_id = hw_fn_id; + __entry->sfnum = sfnum; + ), + TP_printk("(%s) port_index=%u controller=%u hw_id=0x%x sfnum=%u\n", + __get_str(devname), __entry->port_index, __entry->controller, + __entry->hw_fn_id, __entry->sfnum) +); + +TRACE_EVENT(mlx5_sf_free, + TP_PROTO(const struct mlx5_core_dev *dev, + unsigned int port_index, + u32 controller, + u16 hw_fn_id), + TP_ARGS(dev, port_index, controller, hw_fn_id), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(unsigned int, port_index) + __field(u32, controller) + __field(u16, hw_fn_id) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->port_index = port_index; + __entry->controller = controller; + __entry->hw_fn_id = hw_fn_id; + ), + TP_printk("(%s) port_index=%u controller=%u hw_id=0x%x\n", + __get_str(devname), __entry->port_index, __entry->controller, + __entry->hw_fn_id) +); + +TRACE_EVENT(mlx5_sf_hwc_alloc, + TP_PROTO(const struct mlx5_core_dev *dev, + u32 controller, + u16 hw_fn_id, + u32 sfnum), + TP_ARGS(dev, controller, hw_fn_id, sfnum), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(u32, controller) + __field(u16, hw_fn_id) + __field(u32, sfnum) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->controller = controller; + __entry->hw_fn_id = hw_fn_id; + __entry->sfnum = sfnum; + ), + TP_printk("(%s) controller=%u hw_id=0x%x sfnum=%u\n", + __get_str(devname), __entry->controller, __entry->hw_fn_id, + __entry->sfnum) +); + +TRACE_EVENT(mlx5_sf_hwc_free, + TP_PROTO(const struct mlx5_core_dev *dev, + u16 hw_fn_id), + TP_ARGS(dev, hw_fn_id), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(u16, hw_fn_id) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->hw_fn_id = hw_fn_id; + ), + TP_printk("(%s) hw_id=0x%x\n", __get_str(devname), __entry->hw_fn_id) +); + +TRACE_EVENT(mlx5_sf_hwc_deferred_free, + TP_PROTO(const struct mlx5_core_dev *dev, + u16 hw_fn_id), + TP_ARGS(dev, hw_fn_id), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(u16, hw_fn_id) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->hw_fn_id = hw_fn_id; + ), + TP_printk("(%s) hw_id=0x%x\n", __get_str(devname), __entry->hw_fn_id) +); + +DECLARE_EVENT_CLASS(mlx5_sf_state_template, + TP_PROTO(const struct mlx5_core_dev *dev, + u32 port_index, + u32 controller, + u16 hw_fn_id), + TP_ARGS(dev, port_index, controller, hw_fn_id), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(unsigned int, port_index) + __field(u32, controller) + __field(u16, hw_fn_id)), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->port_index = port_index; + __entry->controller = controller; + __entry->hw_fn_id = hw_fn_id; + ), + TP_printk("(%s) port_index=%u controller=%u hw_id=0x%x\n", + __get_str(devname), __entry->port_index, __entry->controller, + __entry->hw_fn_id) +); + +DEFINE_EVENT(mlx5_sf_state_template, mlx5_sf_activate, + TP_PROTO(const struct mlx5_core_dev *dev, + u32 port_index, + u32 controller, + u16 hw_fn_id), + TP_ARGS(dev, port_index, controller, hw_fn_id) + ); + +DEFINE_EVENT(mlx5_sf_state_template, mlx5_sf_deactivate, + TP_PROTO(const struct mlx5_core_dev *dev, + u32 port_index, + u32 controller, + u16 hw_fn_id), + TP_ARGS(dev, port_index, controller, hw_fn_id) + ); + +TRACE_EVENT(mlx5_sf_update_state, + TP_PROTO(const struct mlx5_core_dev *dev, + unsigned int port_index, + u32 controller, + u16 hw_fn_id, + u8 state), + TP_ARGS(dev, port_index, controller, hw_fn_id, state), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(unsigned int, port_index) + __field(u32, controller) + __field(u16, hw_fn_id) + __field(u8, state) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->port_index = port_index; + __entry->controller = controller; + __entry->hw_fn_id = hw_fn_id; + __entry->state = state; + ), + TP_printk("(%s) port_index=%u controller=%u hw_id=0x%x state=%u\n", + __get_str(devname), __entry->port_index, __entry->controller, + __entry->hw_fn_id, __entry->state) +); + +#endif /* _MLX5_SF_TP_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH sf/diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE sf_tracepoint +#include diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/diag/vhca_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/diag/vhca_tracepoint.h new file mode 100644 index 0000000000..fd814a190b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/diag/vhca_tracepoint.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_SF_VHCA_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_SF_VHCA_TP_ + +#include +#include +#include "sf/vhca_event.h" + +TRACE_EVENT(mlx5_sf_vhca_event, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_vhca_state_event *event), + TP_ARGS(dev, event), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(u16, hw_fn_id) + __field(u32, sfnum) + __field(u8, vhca_state) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->hw_fn_id = event->function_id; + __entry->sfnum = event->sw_function_id; + __entry->vhca_state = event->new_vhca_state; + ), + TP_printk("(%s) hw_id=0x%x sfnum=%u vhca_state=%d\n", + __get_str(devname), __entry->hw_fn_id, + __entry->sfnum, __entry->vhca_state) +); + +#endif /* _MLX5_SF_VHCA_TP_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH sf/diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE vhca_tracepoint +#include diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c new file mode 100644 index 0000000000..1f613320fe --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c @@ -0,0 +1,399 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd */ +#include +#include "vhca_event.h" +#include "priv.h" +#include "sf.h" +#include "mlx5_ifc_vhca_event.h" +#include "ecpf.h" +#include "mlx5_core.h" +#include "eswitch.h" +#include "diag/sf_tracepoint.h" +#include "devlink.h" + +struct mlx5_sf_hw { + u32 usr_sfnum; + u8 allocated: 1; + u8 pending_delete: 1; +}; + +struct mlx5_sf_hwc_table { + struct mlx5_sf_hw *sfs; + int max_fn; + u16 start_fn_id; +}; + +enum mlx5_sf_hwc_index { + MLX5_SF_HWC_LOCAL, + MLX5_SF_HWC_EXTERNAL, + MLX5_SF_HWC_MAX, +}; + +struct mlx5_sf_hw_table { + struct mlx5_core_dev *dev; + struct mutex table_lock; /* Serializes sf deletion and vhca state change handler. */ + struct notifier_block vhca_nb; + struct mlx5_sf_hwc_table hwc[MLX5_SF_HWC_MAX]; +}; + +static struct mlx5_sf_hwc_table * +mlx5_sf_controller_to_hwc(struct mlx5_core_dev *dev, u32 controller) +{ + int idx = !!controller; + + return &dev->priv.sf_hw_table->hwc[idx]; +} + +u16 mlx5_sf_sw_to_hw_id(struct mlx5_core_dev *dev, u32 controller, u16 sw_id) +{ + struct mlx5_sf_hwc_table *hwc; + + hwc = mlx5_sf_controller_to_hwc(dev, controller); + return hwc->start_fn_id + sw_id; +} + +static u16 mlx5_sf_hw_to_sw_id(struct mlx5_sf_hwc_table *hwc, u16 hw_id) +{ + return hw_id - hwc->start_fn_id; +} + +static struct mlx5_sf_hwc_table * +mlx5_sf_table_fn_to_hwc(struct mlx5_sf_hw_table *table, u16 fn_id) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(table->hwc); i++) { + if (table->hwc[i].max_fn && + fn_id >= table->hwc[i].start_fn_id && + fn_id < (table->hwc[i].start_fn_id + table->hwc[i].max_fn)) + return &table->hwc[i]; + } + return NULL; +} + +static int mlx5_sf_hw_table_id_alloc(struct mlx5_sf_hw_table *table, u32 controller, + u32 usr_sfnum) +{ + struct mlx5_sf_hwc_table *hwc; + int free_idx = -1; + int i; + + hwc = mlx5_sf_controller_to_hwc(table->dev, controller); + if (!hwc->sfs) + return -ENOSPC; + + for (i = 0; i < hwc->max_fn; i++) { + if (!hwc->sfs[i].allocated && free_idx == -1) { + free_idx = i; + continue; + } + + if (hwc->sfs[i].allocated && hwc->sfs[i].usr_sfnum == usr_sfnum) + return -EEXIST; + } + + if (free_idx == -1) + return -ENOSPC; + + hwc->sfs[free_idx].usr_sfnum = usr_sfnum; + hwc->sfs[free_idx].allocated = true; + return free_idx; +} + +static void mlx5_sf_hw_table_id_free(struct mlx5_sf_hw_table *table, u32 controller, int id) +{ + struct mlx5_sf_hwc_table *hwc; + + hwc = mlx5_sf_controller_to_hwc(table->dev, controller); + hwc->sfs[id].allocated = false; + hwc->sfs[id].pending_delete = false; +} + +int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 controller, u32 usr_sfnum) +{ + struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; + u16 hw_fn_id; + int sw_id; + int err; + + if (!table) + return -EOPNOTSUPP; + + mutex_lock(&table->table_lock); + sw_id = mlx5_sf_hw_table_id_alloc(table, controller, usr_sfnum); + if (sw_id < 0) { + err = sw_id; + goto exist_err; + } + + hw_fn_id = mlx5_sf_sw_to_hw_id(dev, controller, sw_id); + err = mlx5_cmd_alloc_sf(dev, hw_fn_id); + if (err) + goto err; + + err = mlx5_modify_vhca_sw_id(dev, hw_fn_id, usr_sfnum); + if (err) + goto vhca_err; + + if (controller) { + /* If this SF is for external controller, SF manager + * needs to arm firmware to receive the events. + */ + err = mlx5_vhca_event_arm(dev, hw_fn_id); + if (err) + goto vhca_err; + } + + trace_mlx5_sf_hwc_alloc(dev, controller, hw_fn_id, usr_sfnum); + mutex_unlock(&table->table_lock); + return sw_id; + +vhca_err: + mlx5_cmd_dealloc_sf(dev, hw_fn_id); +err: + mlx5_sf_hw_table_id_free(table, controller, sw_id); +exist_err: + mutex_unlock(&table->table_lock); + return err; +} + +void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u32 controller, u16 id) +{ + struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; + u16 hw_fn_id; + + mutex_lock(&table->table_lock); + hw_fn_id = mlx5_sf_sw_to_hw_id(dev, controller, id); + mlx5_cmd_dealloc_sf(dev, hw_fn_id); + mlx5_sf_hw_table_id_free(table, controller, id); + mutex_unlock(&table->table_lock); +} + +static void mlx5_sf_hw_table_hwc_sf_free(struct mlx5_core_dev *dev, + struct mlx5_sf_hwc_table *hwc, int idx) +{ + mlx5_cmd_dealloc_sf(dev, hwc->start_fn_id + idx); + hwc->sfs[idx].allocated = false; + hwc->sfs[idx].pending_delete = false; + trace_mlx5_sf_hwc_free(dev, hwc->start_fn_id + idx); +} + +void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u32 controller, u16 id) +{ + struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; + u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {}; + struct mlx5_sf_hwc_table *hwc; + u16 hw_fn_id; + u8 state; + int err; + + hw_fn_id = mlx5_sf_sw_to_hw_id(dev, controller, id); + hwc = mlx5_sf_controller_to_hwc(dev, controller); + mutex_lock(&table->table_lock); + err = mlx5_cmd_query_vhca_state(dev, hw_fn_id, out, sizeof(out)); + if (err) + goto err; + state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state); + if (state == MLX5_VHCA_STATE_ALLOCATED) { + mlx5_cmd_dealloc_sf(dev, hw_fn_id); + hwc->sfs[id].allocated = false; + } else { + hwc->sfs[id].pending_delete = true; + trace_mlx5_sf_hwc_deferred_free(dev, hw_fn_id); + } +err: + mutex_unlock(&table->table_lock); +} + +static void mlx5_sf_hw_table_hwc_dealloc_all(struct mlx5_core_dev *dev, + struct mlx5_sf_hwc_table *hwc) +{ + int i; + + for (i = 0; i < hwc->max_fn; i++) { + if (hwc->sfs[i].allocated) + mlx5_sf_hw_table_hwc_sf_free(dev, hwc, i); + } +} + +static void mlx5_sf_hw_table_dealloc_all(struct mlx5_sf_hw_table *table) +{ + mlx5_sf_hw_table_hwc_dealloc_all(table->dev, &table->hwc[MLX5_SF_HWC_EXTERNAL]); + mlx5_sf_hw_table_hwc_dealloc_all(table->dev, &table->hwc[MLX5_SF_HWC_LOCAL]); +} + +static int mlx5_sf_hw_table_hwc_init(struct mlx5_sf_hwc_table *hwc, u16 max_fn, u16 base_id) +{ + struct mlx5_sf_hw *sfs; + + if (!max_fn) + return 0; + + sfs = kcalloc(max_fn, sizeof(*sfs), GFP_KERNEL); + if (!sfs) + return -ENOMEM; + + hwc->sfs = sfs; + hwc->max_fn = max_fn; + hwc->start_fn_id = base_id; + return 0; +} + +static void mlx5_sf_hw_table_hwc_cleanup(struct mlx5_sf_hwc_table *hwc) +{ + kfree(hwc->sfs); +} + +static void mlx5_sf_hw_table_res_unregister(struct mlx5_core_dev *dev) +{ + devl_resources_unregister(priv_to_devlink(dev)); +} + +static int mlx5_sf_hw_table_res_register(struct mlx5_core_dev *dev, u16 max_fn, + u16 max_ext_fn) +{ + struct devlink_resource_size_params size_params; + struct devlink *devlink = priv_to_devlink(dev); + int err; + + devlink_resource_size_params_init(&size_params, max_fn, max_fn, 1, + DEVLINK_RESOURCE_UNIT_ENTRY); + err = devl_resource_register(devlink, "max_local_SFs", max_fn, MLX5_DL_RES_MAX_LOCAL_SFS, + DEVLINK_RESOURCE_ID_PARENT_TOP, &size_params); + if (err) + return err; + + devlink_resource_size_params_init(&size_params, max_ext_fn, max_ext_fn, 1, + DEVLINK_RESOURCE_UNIT_ENTRY); + return devl_resource_register(devlink, "max_external_SFs", max_ext_fn, + MLX5_DL_RES_MAX_EXTERNAL_SFS, DEVLINK_RESOURCE_ID_PARENT_TOP, + &size_params); +} + +int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_hw_table *table; + u16 max_ext_fn = 0; + u16 ext_base_id = 0; + u16 base_id; + u16 max_fn; + int err; + + if (!mlx5_vhca_event_supported(dev)) + return 0; + + max_fn = mlx5_sf_max_functions(dev); + + err = mlx5_esw_sf_max_hpf_functions(dev, &max_ext_fn, &ext_base_id); + if (err) + return err; + + if (mlx5_sf_hw_table_res_register(dev, max_fn, max_ext_fn)) + mlx5_core_dbg(dev, "failed to register max SFs resources"); + + if (!max_fn && !max_ext_fn) + return 0; + + table = kzalloc(sizeof(*table), GFP_KERNEL); + if (!table) { + err = -ENOMEM; + goto alloc_err; + } + + mutex_init(&table->table_lock); + table->dev = dev; + dev->priv.sf_hw_table = table; + + base_id = mlx5_sf_start_function_id(dev); + err = mlx5_sf_hw_table_hwc_init(&table->hwc[MLX5_SF_HWC_LOCAL], max_fn, base_id); + if (err) + goto table_err; + + err = mlx5_sf_hw_table_hwc_init(&table->hwc[MLX5_SF_HWC_EXTERNAL], + max_ext_fn, ext_base_id); + if (err) + goto ext_err; + + mlx5_core_dbg(dev, "SF HW table: max sfs = %d, ext sfs = %d\n", max_fn, max_ext_fn); + return 0; + +ext_err: + mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_LOCAL]); +table_err: + mutex_destroy(&table->table_lock); + kfree(table); +alloc_err: + mlx5_sf_hw_table_res_unregister(dev); + return err; +} + +void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; + + if (!table) + goto res_unregister; + + mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_EXTERNAL]); + mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_LOCAL]); + mutex_destroy(&table->table_lock); + kfree(table); +res_unregister: + mlx5_sf_hw_table_res_unregister(dev); +} + +static int mlx5_sf_hw_vhca_event(struct notifier_block *nb, unsigned long opcode, void *data) +{ + struct mlx5_sf_hw_table *table = container_of(nb, struct mlx5_sf_hw_table, vhca_nb); + const struct mlx5_vhca_state_event *event = data; + struct mlx5_sf_hwc_table *hwc; + struct mlx5_sf_hw *sf_hw; + u16 sw_id; + + if (event->new_vhca_state != MLX5_VHCA_STATE_ALLOCATED) + return 0; + + hwc = mlx5_sf_table_fn_to_hwc(table, event->function_id); + if (!hwc) + return 0; + + sw_id = mlx5_sf_hw_to_sw_id(hwc, event->function_id); + sf_hw = &hwc->sfs[sw_id]; + + mutex_lock(&table->table_lock); + /* SF driver notified through firmware that SF is finally detached. + * Hence recycle the sf hardware id for reuse. + */ + if (sf_hw->allocated && sf_hw->pending_delete) + mlx5_sf_hw_table_hwc_sf_free(table->dev, hwc, sw_id); + mutex_unlock(&table->table_lock); + return 0; +} + +int mlx5_sf_hw_table_create(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; + + if (!table) + return 0; + + table->vhca_nb.notifier_call = mlx5_sf_hw_vhca_event; + return mlx5_vhca_event_notifier_register(dev, &table->vhca_nb); +} + +void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; + + if (!table) + return; + + mlx5_vhca_event_notifier_unregister(dev, &table->vhca_nb); + /* Dealloc SFs whose firmware event has been missed. */ + mlx5_sf_hw_table_dealloc_all(table); +} + +bool mlx5_sf_hw_table_supported(const struct mlx5_core_dev *dev) +{ + return !!dev->priv.sf_hw_table; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/mlx5_ifc_vhca_event.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/mlx5_ifc_vhca_event.h new file mode 100644 index 0000000000..4fc870140d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/mlx5_ifc_vhca_event.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#ifndef __MLX5_IFC_VHCA_EVENT_H__ +#define __MLX5_IFC_VHCA_EVENT_H__ + +enum mlx5_ifc_vhca_state { + MLX5_VHCA_STATE_INVALID = 0x0, + MLX5_VHCA_STATE_ALLOCATED = 0x1, + MLX5_VHCA_STATE_ACTIVE = 0x2, + MLX5_VHCA_STATE_IN_USE = 0x3, + MLX5_VHCA_STATE_TEARDOWN_REQUEST = 0x4, +}; + +struct mlx5_ifc_vhca_state_context_bits { + u8 arm_change_event[0x1]; + u8 reserved_at_1[0xb]; + u8 vhca_state[0x4]; + u8 reserved_at_10[0x10]; + + u8 sw_function_id[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_query_vhca_state_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_vhca_state_context_bits vhca_state_context; +}; + +struct mlx5_ifc_query_vhca_state_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; + u8 function_id[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_vhca_state_field_select_bits { + u8 reserved_at_0[0x1e]; + u8 sw_function_id[0x1]; + u8 arm_change_event[0x1]; +}; + +struct mlx5_ifc_modify_vhca_state_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_modify_vhca_state_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; + u8 function_id[0x10]; + + struct mlx5_ifc_vhca_state_field_select_bits vhca_state_field_select; + + struct mlx5_ifc_vhca_state_context_bits vhca_state_context; +}; + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h new file mode 100644 index 0000000000..7114f3fc33 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#ifndef __MLX5_SF_PRIV_H__ +#define __MLX5_SF_PRIV_H__ + +#include + +int mlx5_cmd_alloc_sf(struct mlx5_core_dev *dev, u16 function_id); +int mlx5_cmd_dealloc_sf(struct mlx5_core_dev *dev, u16 function_id); + +int mlx5_cmd_sf_enable_hca(struct mlx5_core_dev *dev, u16 func_id); +int mlx5_cmd_sf_disable_hca(struct mlx5_core_dev *dev, u16 func_id); + +u16 mlx5_sf_sw_to_hw_id(struct mlx5_core_dev *dev, u32 controller, u16 sw_id); + +int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 controller, u32 usr_sfnum); +void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u32 controller, u16 id); +void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u32 controller, u16 id); +bool mlx5_sf_hw_table_supported(const struct mlx5_core_dev *dev); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h new file mode 100644 index 0000000000..860f9ddb71 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#ifndef __MLX5_SF_H__ +#define __MLX5_SF_H__ + +#include +#include "lib/sf.h" + +#ifdef CONFIG_MLX5_SF_MANAGER + +int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev); +void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev); + +int mlx5_sf_hw_table_create(struct mlx5_core_dev *dev); +void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev); + +int mlx5_sf_table_init(struct mlx5_core_dev *dev); +void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev); + +int mlx5_devlink_sf_port_new(struct devlink *devlink, + const struct devlink_port_new_attrs *add_attr, + struct netlink_ext_ack *extack, + struct devlink_port **dl_port); +int mlx5_devlink_sf_port_del(struct devlink *devlink, + struct devlink_port *dl_port, + struct netlink_ext_ack *extack); +int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port, + enum devlink_port_fn_state *state, + enum devlink_port_fn_opstate *opstate, + struct netlink_ext_ack *extack); +int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port, + enum devlink_port_fn_state state, + struct netlink_ext_ack *extack); +#else + +static inline int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev) +{ + return 0; +} + +static inline void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev) +{ +} + +static inline int mlx5_sf_hw_table_create(struct mlx5_core_dev *dev) +{ + return 0; +} + +static inline void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev) +{ +} + +static inline int mlx5_sf_table_init(struct mlx5_core_dev *dev) +{ + return 0; +} + +static inline void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev) +{ +} + +#endif + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c new file mode 100644 index 0000000000..d908fba968 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#include +#include "mlx5_ifc_vhca_event.h" +#include "mlx5_core.h" +#include "vhca_event.h" +#include "ecpf.h" +#define CREATE_TRACE_POINTS +#include "diag/vhca_tracepoint.h" + +struct mlx5_vhca_state_notifier { + struct mlx5_core_dev *dev; + struct mlx5_nb nb; + struct blocking_notifier_head n_head; +}; + +struct mlx5_vhca_event_work { + struct work_struct work; + struct mlx5_vhca_state_notifier *notifier; + struct mlx5_vhca_state_event event; +}; + +int mlx5_cmd_query_vhca_state(struct mlx5_core_dev *dev, u16 function_id, u32 *out, u32 outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_vhca_state_in)] = {}; + + MLX5_SET(query_vhca_state_in, in, opcode, MLX5_CMD_OP_QUERY_VHCA_STATE); + MLX5_SET(query_vhca_state_in, in, function_id, function_id); + MLX5_SET(query_vhca_state_in, in, embedded_cpu_function, 0); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + +static int mlx5_cmd_modify_vhca_state(struct mlx5_core_dev *dev, u16 function_id, + u32 *in, u32 inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_vhca_state_out)] = {}; + + MLX5_SET(modify_vhca_state_in, in, opcode, MLX5_CMD_OP_MODIFY_VHCA_STATE); + MLX5_SET(modify_vhca_state_in, in, function_id, function_id); + MLX5_SET(modify_vhca_state_in, in, embedded_cpu_function, 0); + + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +} + +int mlx5_modify_vhca_sw_id(struct mlx5_core_dev *dev, u16 function_id, u32 sw_fn_id) +{ + u32 out[MLX5_ST_SZ_DW(modify_vhca_state_out)] = {}; + u32 in[MLX5_ST_SZ_DW(modify_vhca_state_in)] = {}; + + MLX5_SET(modify_vhca_state_in, in, opcode, MLX5_CMD_OP_MODIFY_VHCA_STATE); + MLX5_SET(modify_vhca_state_in, in, function_id, function_id); + MLX5_SET(modify_vhca_state_in, in, embedded_cpu_function, 0); + MLX5_SET(modify_vhca_state_in, in, vhca_state_field_select.sw_function_id, 1); + MLX5_SET(modify_vhca_state_in, in, vhca_state_context.sw_function_id, sw_fn_id); + + return mlx5_cmd_exec_inout(dev, modify_vhca_state, in, out); +} + +int mlx5_vhca_event_arm(struct mlx5_core_dev *dev, u16 function_id) +{ + u32 in[MLX5_ST_SZ_DW(modify_vhca_state_in)] = {}; + + MLX5_SET(modify_vhca_state_in, in, vhca_state_context.arm_change_event, 1); + MLX5_SET(modify_vhca_state_in, in, vhca_state_field_select.arm_change_event, 1); + + return mlx5_cmd_modify_vhca_state(dev, function_id, in, sizeof(in)); +} + +static void +mlx5_vhca_event_notify(struct mlx5_core_dev *dev, struct mlx5_vhca_state_event *event) +{ + u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {}; + int err; + + err = mlx5_cmd_query_vhca_state(dev, event->function_id, out, sizeof(out)); + if (err) + return; + + event->sw_function_id = MLX5_GET(query_vhca_state_out, out, + vhca_state_context.sw_function_id); + event->new_vhca_state = MLX5_GET(query_vhca_state_out, out, + vhca_state_context.vhca_state); + + mlx5_vhca_event_arm(dev, event->function_id); + trace_mlx5_sf_vhca_event(dev, event); + + blocking_notifier_call_chain(&dev->priv.vhca_state_notifier->n_head, 0, event); +} + +static void mlx5_vhca_state_work_handler(struct work_struct *_work) +{ + struct mlx5_vhca_event_work *work = container_of(_work, struct mlx5_vhca_event_work, work); + struct mlx5_vhca_state_notifier *notifier = work->notifier; + struct mlx5_core_dev *dev = notifier->dev; + + mlx5_vhca_event_notify(dev, &work->event); + kfree(work); +} + +static int +mlx5_vhca_state_change_notifier(struct notifier_block *nb, unsigned long type, void *data) +{ + struct mlx5_vhca_state_notifier *notifier = + mlx5_nb_cof(nb, struct mlx5_vhca_state_notifier, nb); + struct mlx5_vhca_event_work *work; + struct mlx5_eqe *eqe = data; + + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return NOTIFY_DONE; + INIT_WORK(&work->work, &mlx5_vhca_state_work_handler); + work->notifier = notifier; + work->event.function_id = be16_to_cpu(eqe->data.vhca_state.function_id); + mlx5_events_work_enqueue(notifier->dev, &work->work); + return NOTIFY_OK; +} + +void mlx5_vhca_state_cap_handle(struct mlx5_core_dev *dev, void *set_hca_cap) +{ + if (!mlx5_vhca_event_supported(dev)) + return; + + MLX5_SET(cmd_hca_cap, set_hca_cap, vhca_state, 1); + MLX5_SET(cmd_hca_cap, set_hca_cap, event_on_vhca_state_allocated, 1); + MLX5_SET(cmd_hca_cap, set_hca_cap, event_on_vhca_state_active, 1); + MLX5_SET(cmd_hca_cap, set_hca_cap, event_on_vhca_state_in_use, 1); + MLX5_SET(cmd_hca_cap, set_hca_cap, event_on_vhca_state_teardown_request, 1); +} + +int mlx5_vhca_event_init(struct mlx5_core_dev *dev) +{ + struct mlx5_vhca_state_notifier *notifier; + + if (!mlx5_vhca_event_supported(dev)) + return 0; + + notifier = kzalloc(sizeof(*notifier), GFP_KERNEL); + if (!notifier) + return -ENOMEM; + + dev->priv.vhca_state_notifier = notifier; + notifier->dev = dev; + BLOCKING_INIT_NOTIFIER_HEAD(¬ifier->n_head); + MLX5_NB_INIT(¬ifier->nb, mlx5_vhca_state_change_notifier, VHCA_STATE_CHANGE); + return 0; +} + +void mlx5_vhca_event_cleanup(struct mlx5_core_dev *dev) +{ + if (!mlx5_vhca_event_supported(dev)) + return; + + kfree(dev->priv.vhca_state_notifier); + dev->priv.vhca_state_notifier = NULL; +} + +void mlx5_vhca_event_start(struct mlx5_core_dev *dev) +{ + struct mlx5_vhca_state_notifier *notifier; + + if (!dev->priv.vhca_state_notifier) + return; + + notifier = dev->priv.vhca_state_notifier; + mlx5_eq_notifier_register(dev, ¬ifier->nb); +} + +void mlx5_vhca_event_stop(struct mlx5_core_dev *dev) +{ + struct mlx5_vhca_state_notifier *notifier; + + if (!dev->priv.vhca_state_notifier) + return; + + notifier = dev->priv.vhca_state_notifier; + mlx5_eq_notifier_unregister(dev, ¬ifier->nb); +} + +int mlx5_vhca_event_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + if (!dev->priv.vhca_state_notifier) + return -EOPNOTSUPP; + return blocking_notifier_chain_register(&dev->priv.vhca_state_notifier->n_head, nb); +} + +void mlx5_vhca_event_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + blocking_notifier_chain_unregister(&dev->priv.vhca_state_notifier->n_head, nb); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.h new file mode 100644 index 0000000000..013cdfe906 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#ifndef __MLX5_VHCA_EVENT_H__ +#define __MLX5_VHCA_EVENT_H__ + +#ifdef CONFIG_MLX5_SF + +struct mlx5_vhca_state_event { + u16 function_id; + u16 sw_function_id; + u8 new_vhca_state; +}; + +static inline bool mlx5_vhca_event_supported(const struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN_MAX(dev, vhca_state); +} + +void mlx5_vhca_state_cap_handle(struct mlx5_core_dev *dev, void *set_hca_cap); +int mlx5_vhca_event_init(struct mlx5_core_dev *dev); +void mlx5_vhca_event_cleanup(struct mlx5_core_dev *dev); +void mlx5_vhca_event_start(struct mlx5_core_dev *dev); +void mlx5_vhca_event_stop(struct mlx5_core_dev *dev); +int mlx5_vhca_event_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb); +void mlx5_vhca_event_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb); +int mlx5_modify_vhca_sw_id(struct mlx5_core_dev *dev, u16 function_id, u32 sw_fn_id); +int mlx5_vhca_event_arm(struct mlx5_core_dev *dev, u16 function_id); +int mlx5_cmd_query_vhca_state(struct mlx5_core_dev *dev, u16 function_id, + u32 *out, u32 outlen); +#else + +static inline void mlx5_vhca_state_cap_handle(struct mlx5_core_dev *dev, void *set_hca_cap) +{ +} + +static inline int mlx5_vhca_event_init(struct mlx5_core_dev *dev) +{ + return 0; +} + +static inline void mlx5_vhca_event_cleanup(struct mlx5_core_dev *dev) +{ +} + +static inline void mlx5_vhca_event_start(struct mlx5_core_dev *dev) +{ +} + +static inline void mlx5_vhca_event_stop(struct mlx5_core_dev *dev) +{ +} + +#endif + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c new file mode 100644 index 0000000000..a2fc937d54 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -0,0 +1,376 @@ +/* + * Copyright (c) 2014, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include "mlx5_core.h" +#include "mlx5_irq.h" +#include "eswitch.h" + +static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf, u16 func_id) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + struct mlx5_hca_vport_context *in; + int err = 0; + + /* Restore sriov guid and policy settings */ + if (sriov->vfs_ctx[vf].node_guid || + sriov->vfs_ctx[vf].port_guid || + sriov->vfs_ctx[vf].policy != MLX5_POLICY_INVALID) { + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + in->node_guid = sriov->vfs_ctx[vf].node_guid; + in->port_guid = sriov->vfs_ctx[vf].port_guid; + in->policy = sriov->vfs_ctx[vf].policy; + in->field_select = + !!(in->port_guid) * MLX5_HCA_VPORT_SEL_PORT_GUID | + !!(in->node_guid) * MLX5_HCA_VPORT_SEL_NODE_GUID | + !!(in->policy) * MLX5_HCA_VPORT_SEL_STATE_POLICY; + + err = mlx5_core_modify_hca_vport_context(dev, 1, 1, func_id, in); + if (err) + mlx5_core_warn(dev, "modify vport context failed, unable to restore VF %d settings\n", vf); + + kfree(in); + } + + return err; +} + +static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err, vf, num_msix_count; + int vport_num; + + err = mlx5_eswitch_enable(dev->priv.eswitch, num_vfs); + if (err) { + mlx5_core_warn(dev, + "failed to enable eswitch SRIOV (%d)\n", err); + return err; + } + + num_msix_count = mlx5_get_default_msix_vec_count(dev, num_vfs); + for (vf = 0; vf < num_vfs; vf++) { + /* Notify the VF before its enablement to let it set + * some stuff. + */ + blocking_notifier_call_chain(&sriov->vfs_ctx[vf].notifier, + MLX5_PF_NOTIFY_ENABLE_VF, dev); + err = mlx5_core_enable_hca(dev, vf + 1); + if (err) { + mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", vf, err); + continue; + } + + err = mlx5_set_msix_vec_count(dev, vf + 1, num_msix_count); + if (err) { + mlx5_core_warn(dev, + "failed to set MSI-X vector counts VF %d, err %d\n", + vf, err); + continue; + } + + sriov->vfs_ctx[vf].enabled = 1; + if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) { + vport_num = mlx5_core_ec_sriov_enabled(dev) ? + mlx5_core_ec_vf_vport_base(dev) + vf + : vf + 1; + err = sriov_restore_guids(dev, vf, vport_num); + if (err) { + mlx5_core_warn(dev, + "failed to restore VF %d settings, err %d\n", + vf, err); + continue; + } + } + mlx5_core_dbg(dev, "successfully enabled VF* %d\n", vf); + } + + return 0; +} + +static void +mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf, bool num_vf_change) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + bool wait_for_ec_vf_pages = true; + bool wait_for_vf_pages = true; + int err; + int vf; + + for (vf = num_vfs - 1; vf >= 0; vf--) { + if (!sriov->vfs_ctx[vf].enabled) + continue; + /* Notify the VF before its disablement to let it clean + * some resources. + */ + blocking_notifier_call_chain(&sriov->vfs_ctx[vf].notifier, + MLX5_PF_NOTIFY_DISABLE_VF, dev); + err = mlx5_core_disable_hca(dev, vf + 1); + if (err) { + mlx5_core_warn(dev, "failed to disable VF %d\n", vf); + continue; + } + sriov->vfs_ctx[vf].enabled = 0; + } + + mlx5_eswitch_disable_sriov(dev->priv.eswitch, clear_vf); + + /* There are a number of scenarios when SRIOV is being disabled: + * 1. VFs or ECVFs had been created, and now set back to 0 (num_vf_change == true). + * - If EC SRIOV is enabled then this flow is happening on the + * embedded platform, wait for only EC VF pages. + * - If EC SRIOV is not enabled this flow is happening on non-embedded + * platform, wait for the VF pages. + * + * 2. The driver is being unloaded. In this case wait for all pages. + */ + if (num_vf_change) { + if (mlx5_core_ec_sriov_enabled(dev)) + wait_for_vf_pages = false; + else + wait_for_ec_vf_pages = false; + } + + if (wait_for_ec_vf_pages && mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_EC_VF])) + mlx5_core_warn(dev, "timeout reclaiming EC VFs pages\n"); + + /* For ECPFs, skip waiting for host VF pages until ECPF is destroyed */ + if (mlx5_core_is_ecpf(dev)) + return; + + if (wait_for_vf_pages && mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_VF])) + mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); +} + +static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct devlink *devlink = priv_to_devlink(dev); + int err; + + devl_lock(devlink); + err = mlx5_device_enable_sriov(dev, num_vfs); + devl_unlock(devlink); + if (err) { + mlx5_core_warn(dev, "mlx5_device_enable_sriov failed : %d\n", err); + return err; + } + + err = pci_enable_sriov(pdev, num_vfs); + if (err) { + mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err); + mlx5_device_disable_sriov(dev, num_vfs, true, true); + } + return err; +} + +void mlx5_sriov_disable(struct pci_dev *pdev, bool num_vf_change) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct devlink *devlink = priv_to_devlink(dev); + int num_vfs = pci_num_vf(dev->pdev); + + pci_disable_sriov(pdev); + devl_lock(devlink); + mlx5_device_disable_sriov(dev, num_vfs, true, num_vf_change); + devl_unlock(devlink); +} + +int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err = 0; + + mlx5_core_dbg(dev, "requested num_vfs %d\n", num_vfs); + + if (num_vfs) + err = mlx5_sriov_enable(pdev, num_vfs); + else + mlx5_sriov_disable(pdev, true); + + if (!err) + sriov->num_vfs = num_vfs; + return err ? err : num_vfs; +} + +int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count) +{ + struct pci_dev *pf = pci_physfn(vf); + struct mlx5_core_sriov *sriov; + struct mlx5_core_dev *dev; + int num_vf_msix, id; + + dev = pci_get_drvdata(pf); + num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); + if (!num_vf_msix) + return -EOPNOTSUPP; + + if (!msix_vec_count) + msix_vec_count = + mlx5_get_default_msix_vec_count(dev, pci_num_vf(pf)); + + sriov = &dev->priv.sriov; + id = pci_iov_vf_id(vf); + if (id < 0 || !sriov->vfs_ctx[id].enabled) + return -EINVAL; + + return mlx5_set_msix_vec_count(dev, id + 1, msix_vec_count); +} + +int mlx5_sriov_attach(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_pf(dev) || !pci_num_vf(dev->pdev)) + return 0; + + /* If sriov VFs exist in PCI level, enable them in device level */ + return mlx5_device_enable_sriov(dev, pci_num_vf(dev->pdev)); +} + +void mlx5_sriov_detach(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_pf(dev)) + return; + + mlx5_device_disable_sriov(dev, pci_num_vf(dev->pdev), false, false); +} + +static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev) +{ + u16 host_total_vfs; + const u32 *out; + + if (mlx5_core_is_ecpf_esw_manager(dev)) { + out = mlx5_esw_query_functions(dev); + + /* Old FW doesn't support getting total_vfs from esw func + * but supports getting it from pci_sriov. + */ + if (IS_ERR(out)) + goto done; + host_total_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_total_vfs); + kvfree(out); + return host_total_vfs; + } + +done: + return pci_sriov_get_totalvfs(dev->pdev); +} + +int mlx5_sriov_init(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + struct pci_dev *pdev = dev->pdev; + int total_vfs, i; + + if (!mlx5_core_is_pf(dev)) + return 0; + + total_vfs = pci_sriov_get_totalvfs(pdev); + sriov->max_vfs = mlx5_get_max_vfs(dev); + sriov->num_vfs = pci_num_vf(pdev); + sriov->max_ec_vfs = mlx5_core_ec_sriov_enabled(dev) ? pci_sriov_get_totalvfs(dev->pdev) : 0; + sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); + if (!sriov->vfs_ctx) + return -ENOMEM; + + for (i = 0; i < total_vfs; i++) + BLOCKING_INIT_NOTIFIER_HEAD(&sriov->vfs_ctx[i].notifier); + + return 0; +} + +void mlx5_sriov_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + + if (!mlx5_core_is_pf(dev)) + return; + + kfree(sriov->vfs_ctx); +} + +/** + * mlx5_sriov_blocking_notifier_unregister - Unregister a VF from + * a notification block chain. + * + * @mdev: The mlx5 core device. + * @vf_id: The VF id. + * @nb: The notifier block to be unregistered. + */ +void mlx5_sriov_blocking_notifier_unregister(struct mlx5_core_dev *mdev, + int vf_id, + struct notifier_block *nb) +{ + struct mlx5_vf_context *vfs_ctx; + struct mlx5_core_sriov *sriov; + + sriov = &mdev->priv.sriov; + if (WARN_ON(vf_id < 0 || vf_id >= sriov->num_vfs)) + return; + + vfs_ctx = &sriov->vfs_ctx[vf_id]; + blocking_notifier_chain_unregister(&vfs_ctx->notifier, nb); +} +EXPORT_SYMBOL(mlx5_sriov_blocking_notifier_unregister); + +/** + * mlx5_sriov_blocking_notifier_register - Register a VF notification + * block chain. + * + * @mdev: The mlx5 core device. + * @vf_id: The VF id. + * @nb: The notifier block to be called upon the VF events. + * + * Returns 0 on success or an error code. + */ +int mlx5_sriov_blocking_notifier_register(struct mlx5_core_dev *mdev, + int vf_id, + struct notifier_block *nb) +{ + struct mlx5_vf_context *vfs_ctx; + struct mlx5_core_sriov *sriov; + + sriov = &mdev->priv.sriov; + if (vf_id < 0 || vf_id >= sriov->num_vfs) + return -EINVAL; + + vfs_ctx = &sriov->vfs_ctx[vf_id]; + return blocking_notifier_chain_register(&vfs_ctx->notifier, nb); +} +EXPORT_SYMBOL(mlx5_sriov_blocking_notifier_register); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile new file mode 100644 index 0000000000..c78512eed8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c new file mode 100644 index 0000000000..90c38cbbde --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -0,0 +1,2217 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" +#include "dr_ste.h" + +enum dr_action_domain { + DR_ACTION_DOMAIN_NIC_INGRESS, + DR_ACTION_DOMAIN_NIC_EGRESS, + DR_ACTION_DOMAIN_FDB_INGRESS, + DR_ACTION_DOMAIN_FDB_EGRESS, + DR_ACTION_DOMAIN_MAX, +}; + +enum dr_action_valid_state { + DR_ACTION_STATE_ERR, + DR_ACTION_STATE_NO_ACTION, + DR_ACTION_STATE_ENCAP, + DR_ACTION_STATE_DECAP, + DR_ACTION_STATE_MODIFY_HDR, + DR_ACTION_STATE_POP_VLAN, + DR_ACTION_STATE_PUSH_VLAN, + DR_ACTION_STATE_NON_TERM, + DR_ACTION_STATE_TERM, + DR_ACTION_STATE_ASO, + DR_ACTION_STATE_MAX, +}; + +static const char * const action_type_to_str[] = { + [DR_ACTION_TYP_TNL_L2_TO_L2] = "DR_ACTION_TYP_TNL_L2_TO_L2", + [DR_ACTION_TYP_L2_TO_TNL_L2] = "DR_ACTION_TYP_L2_TO_TNL_L2", + [DR_ACTION_TYP_TNL_L3_TO_L2] = "DR_ACTION_TYP_TNL_L3_TO_L2", + [DR_ACTION_TYP_L2_TO_TNL_L3] = "DR_ACTION_TYP_L2_TO_TNL_L3", + [DR_ACTION_TYP_DROP] = "DR_ACTION_TYP_DROP", + [DR_ACTION_TYP_QP] = "DR_ACTION_TYP_QP", + [DR_ACTION_TYP_FT] = "DR_ACTION_TYP_FT", + [DR_ACTION_TYP_CTR] = "DR_ACTION_TYP_CTR", + [DR_ACTION_TYP_TAG] = "DR_ACTION_TYP_TAG", + [DR_ACTION_TYP_MODIFY_HDR] = "DR_ACTION_TYP_MODIFY_HDR", + [DR_ACTION_TYP_VPORT] = "DR_ACTION_TYP_VPORT", + [DR_ACTION_TYP_POP_VLAN] = "DR_ACTION_TYP_POP_VLAN", + [DR_ACTION_TYP_PUSH_VLAN] = "DR_ACTION_TYP_PUSH_VLAN", + [DR_ACTION_TYP_SAMPLER] = "DR_ACTION_TYP_SAMPLER", + [DR_ACTION_TYP_INSERT_HDR] = "DR_ACTION_TYP_INSERT_HDR", + [DR_ACTION_TYP_REMOVE_HDR] = "DR_ACTION_TYP_REMOVE_HDR", + [DR_ACTION_TYP_ASO_FLOW_METER] = "DR_ACTION_TYP_ASO_FLOW_METER", + [DR_ACTION_TYP_RANGE] = "DR_ACTION_TYP_RANGE", + [DR_ACTION_TYP_MAX] = "DR_ACTION_UNKNOWN", +}; + +static const char *dr_action_id_to_str(enum mlx5dr_action_type action_id) +{ + if (action_id > DR_ACTION_TYP_MAX) + action_id = DR_ACTION_TYP_MAX; + return action_type_to_str[action_id]; +} + +static const enum dr_action_valid_state +next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] = { + [DR_ACTION_DOMAIN_NIC_INGRESS] = { + [DR_ACTION_STATE_NO_ACTION] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_DECAP] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_ENCAP] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_MODIFY_HDR] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_POP_VLAN] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_PUSH_VLAN] = { + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_NON_TERM] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_ASO] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_TERM] = { + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM, + }, + }, + [DR_ACTION_DOMAIN_NIC_EGRESS] = { + [DR_ACTION_STATE_NO_ACTION] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_DECAP] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_ENCAP] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_MODIFY_HDR] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_POP_VLAN] = { + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_PUSH_VLAN] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_NON_TERM] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_ASO] = { + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO, + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + }, + [DR_ACTION_STATE_TERM] = { + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM, + }, + }, + [DR_ACTION_DOMAIN_FDB_INGRESS] = { + [DR_ACTION_STATE_NO_ACTION] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_DECAP] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_ENCAP] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_MODIFY_HDR] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_POP_VLAN] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_PUSH_VLAN] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_NON_TERM] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_TNL_L3_TO_L2] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_ASO] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_TERM] = { + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM, + }, + }, + [DR_ACTION_DOMAIN_FDB_EGRESS] = { + [DR_ACTION_STATE_NO_ACTION] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_DECAP] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_ENCAP] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_MODIFY_HDR] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_POP_VLAN] = { + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_PUSH_VLAN] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_NON_TERM] = { + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_INSERT_HDR] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_REMOVE_HDR] = DR_ACTION_STATE_DECAP, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_ASO] = { + [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_L2_TO_TNL_L3] = DR_ACTION_STATE_ENCAP, + [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, + [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, + [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO, + }, + [DR_ACTION_STATE_TERM] = { + [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM, + }, + }, +}; + +static int +dr_action_reformat_to_action_type(enum mlx5dr_action_reformat_type reformat_type, + enum mlx5dr_action_type *action_type) +{ + switch (reformat_type) { + case DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2: + *action_type = DR_ACTION_TYP_TNL_L2_TO_L2; + break; + case DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2: + *action_type = DR_ACTION_TYP_L2_TO_TNL_L2; + break; + case DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2: + *action_type = DR_ACTION_TYP_TNL_L3_TO_L2; + break; + case DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3: + *action_type = DR_ACTION_TYP_L2_TO_TNL_L3; + break; + case DR_ACTION_REFORMAT_TYP_INSERT_HDR: + *action_type = DR_ACTION_TYP_INSERT_HDR; + break; + case DR_ACTION_REFORMAT_TYP_REMOVE_HDR: + *action_type = DR_ACTION_TYP_REMOVE_HDR; + break; + default: + return -EINVAL; + } + + return 0; +} + +/* Apply the actions on the rule STE array starting from the last_ste. + * Actions might require more than one STE, new_num_stes will return + * the new size of the STEs array, rule with actions. + */ +static void dr_actions_apply(struct mlx5dr_domain *dmn, + enum mlx5dr_domain_nic_type nic_type, + u8 *action_type_set, + u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, + u32 *new_num_stes) +{ + struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx; + u32 added_stes = 0; + + if (nic_type == DR_DOMAIN_NIC_TYPE_RX) + mlx5dr_ste_set_actions_rx(ste_ctx, dmn, action_type_set, + last_ste, attr, &added_stes); + else + mlx5dr_ste_set_actions_tx(ste_ctx, dmn, action_type_set, + last_ste, attr, &added_stes); + + *new_num_stes += added_stes; +} + +static enum dr_action_domain +dr_action_get_action_domain(enum mlx5dr_domain_type domain, + enum mlx5dr_domain_nic_type nic_type) +{ + switch (domain) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + return DR_ACTION_DOMAIN_NIC_INGRESS; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + return DR_ACTION_DOMAIN_NIC_EGRESS; + case MLX5DR_DOMAIN_TYPE_FDB: + if (nic_type == DR_DOMAIN_NIC_TYPE_RX) + return DR_ACTION_DOMAIN_FDB_INGRESS; + return DR_ACTION_DOMAIN_FDB_EGRESS; + default: + WARN_ON(true); + return DR_ACTION_DOMAIN_MAX; + } +} + +static +int dr_action_validate_and_get_next_state(enum dr_action_domain action_domain, + u32 action_type, + u32 *state) +{ + u32 cur_state = *state; + + /* Check action state machine is valid */ + *state = next_action_state[action_domain][cur_state][action_type]; + + if (*state == DR_ACTION_STATE_ERR) + return -EOPNOTSUPP; + + return 0; +} + +static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn, + struct mlx5dr_action *dest_action, + u64 *final_icm_addr) +{ + int ret; + + switch (dest_action->action_type) { + case DR_ACTION_TYP_FT: + /* Allow destination flow table only if table is a terminating + * table, since there is an *assumption* that in such case FW + * will recalculate the CS. + */ + if (dest_action->dest_tbl->is_fw_tbl) { + *final_icm_addr = dest_action->dest_tbl->fw_tbl.rx_icm_addr; + } else { + mlx5dr_dbg(dmn, + "Destination FT should be terminating when modify TTL is used\n"); + return -EINVAL; + } + break; + + case DR_ACTION_TYP_VPORT: + /* If destination is vport we will get the FW flow table + * that recalculates the CS and forwards to the vport. + */ + ret = mlx5dr_domain_get_recalc_cs_ft_addr(dest_action->vport->dmn, + dest_action->vport->caps->num, + final_icm_addr); + if (ret) { + mlx5dr_err(dmn, "Failed to get FW cs recalc flow table\n"); + return ret; + } + break; + + default: + break; + } + + return 0; +} + +static void dr_action_modify_ttl_adjust(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_actions_attr *attr, + bool rx_rule, + bool *recalc_cs_required) +{ + *recalc_cs_required = false; + + /* if device supports csum recalculation - no adjustment needed */ + if (mlx5dr_ste_supp_ttl_cs_recalc(&dmn->info.caps)) + return; + + /* no adjustment needed on TX rules */ + if (!rx_rule) + return; + + if (!MLX5_CAP_ESW_FLOWTABLE(dmn->mdev, fdb_ipv4_ttl_modify)) { + /* Ignore the modify TTL action. + * It is always kept as last HW action. + */ + attr->modify_actions--; + return; + } + + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB) + /* Due to a HW bug on some devices, modifying TTL on RX flows + * will cause an incorrect checksum calculation. In such cases + * we will use a FW table to recalculate the checksum. + */ + *recalc_cs_required = true; +} + +static void dr_action_print_sequence(struct mlx5dr_domain *dmn, + struct mlx5dr_action *actions[], + int last_idx) +{ + int i; + + for (i = 0; i <= last_idx; i++) + mlx5dr_err(dmn, "< %s (%d) > ", + dr_action_id_to_str(actions[i]->action_type), + actions[i]->action_type); +} + +static int dr_action_get_dest_fw_tbl_addr(struct mlx5dr_matcher *matcher, + struct mlx5dr_action_dest_tbl *dest_tbl, + bool is_rx_rule, + u64 *final_icm_addr) +{ + struct mlx5dr_cmd_query_flow_table_details output; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + int ret; + + if (!dest_tbl->fw_tbl.rx_icm_addr) { + ret = mlx5dr_cmd_query_flow_table(dmn->mdev, + dest_tbl->fw_tbl.type, + dest_tbl->fw_tbl.id, + &output); + if (ret) { + mlx5dr_err(dmn, + "Failed mlx5_cmd_query_flow_table ret: %d\n", + ret); + return ret; + } + + dest_tbl->fw_tbl.tx_icm_addr = output.sw_owner_icm_root_1; + dest_tbl->fw_tbl.rx_icm_addr = output.sw_owner_icm_root_0; + } + + *final_icm_addr = is_rx_rule ? dest_tbl->fw_tbl.rx_icm_addr : + dest_tbl->fw_tbl.tx_icm_addr; + return 0; +} + +static int dr_action_get_dest_sw_tbl_addr(struct mlx5dr_matcher *matcher, + struct mlx5dr_action_dest_tbl *dest_tbl, + bool is_rx_rule, + u64 *final_icm_addr) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_icm_chunk *chunk; + + if (dest_tbl->tbl->dmn != dmn) { + mlx5dr_err(dmn, + "Destination table belongs to a different domain\n"); + return -EINVAL; + } + + if (dest_tbl->tbl->level <= matcher->tbl->level) { + mlx5_core_dbg_once(dmn->mdev, + "Connecting table to a lower/same level destination table\n"); + mlx5dr_dbg(dmn, + "Connecting table at level %d to a destination table at level %d\n", + matcher->tbl->level, + dest_tbl->tbl->level); + } + + chunk = is_rx_rule ? dest_tbl->tbl->rx.s_anchor->chunk : + dest_tbl->tbl->tx.s_anchor->chunk; + + *final_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(chunk); + return 0; +} + +static int dr_action_get_dest_tbl_addr(struct mlx5dr_matcher *matcher, + struct mlx5dr_action_dest_tbl *dest_tbl, + bool is_rx_rule, + u64 *final_icm_addr) +{ + if (dest_tbl->is_fw_tbl) + return dr_action_get_dest_fw_tbl_addr(matcher, + dest_tbl, + is_rx_rule, + final_icm_addr); + + return dr_action_get_dest_sw_tbl_addr(matcher, + dest_tbl, + is_rx_rule, + final_icm_addr); +} + +#define WITH_VLAN_NUM_HW_ACTIONS 6 + +int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_action *actions[], + u32 num_actions, + u8 *ste_arr, + u32 *new_hw_ste_arr_sz) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn; + bool rx_rule = nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + u8 action_type_set[DR_ACTION_TYP_MAX] = {}; + struct mlx5dr_ste_actions_attr attr = {}; + struct mlx5dr_action *dest_action = NULL; + u32 state = DR_ACTION_STATE_NO_ACTION; + enum dr_action_domain action_domain; + bool recalc_cs_required = false; + u8 *last_ste; + int i, ret; + + attr.gvmi = dmn->info.caps.gvmi; + attr.hit_gvmi = dmn->info.caps.gvmi; + attr.final_icm_addr = nic_dmn->default_icm_addr; + action_domain = dr_action_get_action_domain(dmn->type, nic_dmn->type); + + for (i = 0; i < num_actions; i++) { + struct mlx5dr_action *action; + int max_actions_type = 1; + u32 action_type; + + action = actions[i]; + action_type = action->action_type; + + switch (action_type) { + case DR_ACTION_TYP_DROP: + attr.final_icm_addr = nic_dmn->drop_icm_addr; + attr.hit_gvmi = nic_dmn->drop_icm_addr >> 48; + break; + case DR_ACTION_TYP_FT: + dest_action = action; + ret = dr_action_get_dest_tbl_addr(matcher, action->dest_tbl, + rx_rule, &attr.final_icm_addr); + if (ret) + return ret; + break; + case DR_ACTION_TYP_RANGE: + ret = dr_action_get_dest_tbl_addr(matcher, + action->range->hit_tbl_action->dest_tbl, + rx_rule, &attr.final_icm_addr); + if (ret) + return ret; + + ret = dr_action_get_dest_tbl_addr(matcher, + action->range->miss_tbl_action->dest_tbl, + rx_rule, &attr.range.miss_icm_addr); + if (ret) + return ret; + + attr.range.definer_id = action->range->definer_id; + attr.range.min = action->range->min; + attr.range.max = action->range->max; + break; + case DR_ACTION_TYP_QP: + mlx5dr_info(dmn, "Domain doesn't support QP\n"); + return -EOPNOTSUPP; + case DR_ACTION_TYP_CTR: + attr.ctr_id = action->ctr->ctr_id + + action->ctr->offset; + break; + case DR_ACTION_TYP_TAG: + attr.flow_tag = action->flow_tag->flow_tag; + break; + case DR_ACTION_TYP_TNL_L2_TO_L2: + break; + case DR_ACTION_TYP_TNL_L3_TO_L2: + if (action->rewrite->ptrn && action->rewrite->arg) { + attr.decap_index = mlx5dr_arg_get_obj_id(action->rewrite->arg); + attr.decap_actions = action->rewrite->ptrn->num_of_actions; + attr.decap_pat_idx = action->rewrite->ptrn->index; + } else { + attr.decap_index = action->rewrite->index; + attr.decap_actions = action->rewrite->num_of_actions; + attr.decap_with_vlan = + attr.decap_actions == WITH_VLAN_NUM_HW_ACTIONS; + attr.decap_pat_idx = MLX5DR_INVALID_PATTERN_INDEX; + } + break; + case DR_ACTION_TYP_MODIFY_HDR: + if (action->rewrite->single_action_opt) { + attr.modify_actions = action->rewrite->num_of_actions; + attr.single_modify_action = action->rewrite->data; + } else { + if (action->rewrite->ptrn && action->rewrite->arg) { + attr.modify_index = + mlx5dr_arg_get_obj_id(action->rewrite->arg); + attr.modify_actions = action->rewrite->ptrn->num_of_actions; + attr.modify_pat_idx = action->rewrite->ptrn->index; + } else { + attr.modify_index = action->rewrite->index; + attr.modify_actions = action->rewrite->num_of_actions; + attr.modify_pat_idx = MLX5DR_INVALID_PATTERN_INDEX; + } + } + if (action->rewrite->modify_ttl) + dr_action_modify_ttl_adjust(dmn, &attr, rx_rule, + &recalc_cs_required); + break; + case DR_ACTION_TYP_L2_TO_TNL_L2: + case DR_ACTION_TYP_L2_TO_TNL_L3: + if (rx_rule && + !(dmn->ste_ctx->actions_caps & DR_STE_CTX_ACTION_CAP_RX_ENCAP)) { + mlx5dr_info(dmn, "Device doesn't support Encap on RX\n"); + return -EOPNOTSUPP; + } + attr.reformat.size = action->reformat->size; + attr.reformat.id = action->reformat->id; + break; + case DR_ACTION_TYP_SAMPLER: + attr.final_icm_addr = rx_rule ? action->sampler->rx_icm_addr : + action->sampler->tx_icm_addr; + break; + case DR_ACTION_TYP_VPORT: + if (unlikely(rx_rule && action->vport->caps->num == MLX5_VPORT_UPLINK)) { + /* can't go to uplink on RX rule - dropping instead */ + attr.final_icm_addr = nic_dmn->drop_icm_addr; + attr.hit_gvmi = nic_dmn->drop_icm_addr >> 48; + } else { + attr.hit_gvmi = action->vport->caps->vhca_gvmi; + dest_action = action; + attr.final_icm_addr = rx_rule ? + action->vport->caps->icm_address_rx : + action->vport->caps->icm_address_tx; + } + break; + case DR_ACTION_TYP_POP_VLAN: + if (!rx_rule && !(dmn->ste_ctx->actions_caps & + DR_STE_CTX_ACTION_CAP_TX_POP)) { + mlx5dr_dbg(dmn, "Device doesn't support POP VLAN action on TX\n"); + return -EOPNOTSUPP; + } + + max_actions_type = MLX5DR_MAX_VLANS; + attr.vlans.count++; + break; + case DR_ACTION_TYP_PUSH_VLAN: + if (rx_rule && !(dmn->ste_ctx->actions_caps & + DR_STE_CTX_ACTION_CAP_RX_PUSH)) { + mlx5dr_dbg(dmn, "Device doesn't support PUSH VLAN action on RX\n"); + return -EOPNOTSUPP; + } + + max_actions_type = MLX5DR_MAX_VLANS; + if (attr.vlans.count == MLX5DR_MAX_VLANS) { + mlx5dr_dbg(dmn, "Max VLAN push/pop count exceeded\n"); + return -EINVAL; + } + + attr.vlans.headers[attr.vlans.count++] = action->push_vlan->vlan_hdr; + break; + case DR_ACTION_TYP_INSERT_HDR: + case DR_ACTION_TYP_REMOVE_HDR: + attr.reformat.size = action->reformat->size; + attr.reformat.id = action->reformat->id; + attr.reformat.param_0 = action->reformat->param_0; + attr.reformat.param_1 = action->reformat->param_1; + break; + case DR_ACTION_TYP_ASO_FLOW_METER: + attr.aso_flow_meter.obj_id = action->aso->obj_id; + attr.aso_flow_meter.offset = action->aso->offset; + attr.aso_flow_meter.dest_reg_id = action->aso->dest_reg_id; + attr.aso_flow_meter.init_color = action->aso->init_color; + break; + default: + mlx5dr_err(dmn, "Unsupported action type %d\n", action_type); + return -EINVAL; + } + + /* Check action duplication */ + if (++action_type_set[action_type] > max_actions_type) { + mlx5dr_err(dmn, "Action type %d supports only max %d time(s)\n", + action_type, max_actions_type); + return -EINVAL; + } + + /* Check action state machine is valid */ + if (dr_action_validate_and_get_next_state(action_domain, + action_type, + &state)) { + mlx5dr_err(dmn, "Invalid action (gvmi: %d, is_rx: %d) sequence provided:", + attr.gvmi, rx_rule); + dr_action_print_sequence(dmn, actions, i); + return -EOPNOTSUPP; + } + } + + *new_hw_ste_arr_sz = nic_matcher->num_of_builders; + last_ste = ste_arr + DR_STE_SIZE * (nic_matcher->num_of_builders - 1); + + if (recalc_cs_required && dest_action) { + ret = dr_action_handle_cs_recalc(dmn, dest_action, &attr.final_icm_addr); + if (ret) { + mlx5dr_err(dmn, + "Failed to handle checksum recalculation err %d\n", + ret); + return ret; + } + } + + dr_actions_apply(dmn, + nic_dmn->type, + action_type_set, + last_ste, + &attr, + new_hw_ste_arr_sz); + + return 0; +} + +static unsigned int action_size[DR_ACTION_TYP_MAX] = { + [DR_ACTION_TYP_TNL_L2_TO_L2] = sizeof(struct mlx5dr_action_reformat), + [DR_ACTION_TYP_L2_TO_TNL_L2] = sizeof(struct mlx5dr_action_reformat), + [DR_ACTION_TYP_TNL_L3_TO_L2] = sizeof(struct mlx5dr_action_rewrite), + [DR_ACTION_TYP_L2_TO_TNL_L3] = sizeof(struct mlx5dr_action_reformat), + [DR_ACTION_TYP_FT] = sizeof(struct mlx5dr_action_dest_tbl), + [DR_ACTION_TYP_CTR] = sizeof(struct mlx5dr_action_ctr), + [DR_ACTION_TYP_TAG] = sizeof(struct mlx5dr_action_flow_tag), + [DR_ACTION_TYP_MODIFY_HDR] = sizeof(struct mlx5dr_action_rewrite), + [DR_ACTION_TYP_VPORT] = sizeof(struct mlx5dr_action_vport), + [DR_ACTION_TYP_PUSH_VLAN] = sizeof(struct mlx5dr_action_push_vlan), + [DR_ACTION_TYP_INSERT_HDR] = sizeof(struct mlx5dr_action_reformat), + [DR_ACTION_TYP_REMOVE_HDR] = sizeof(struct mlx5dr_action_reformat), + [DR_ACTION_TYP_SAMPLER] = sizeof(struct mlx5dr_action_sampler), + [DR_ACTION_TYP_ASO_FLOW_METER] = sizeof(struct mlx5dr_action_aso_flow_meter), + [DR_ACTION_TYP_RANGE] = sizeof(struct mlx5dr_action_range), +}; + +static struct mlx5dr_action * +dr_action_create_generic(enum mlx5dr_action_type action_type) +{ + struct mlx5dr_action *action; + int extra_size; + + if (action_type < DR_ACTION_TYP_MAX) + extra_size = action_size[action_type]; + else + return NULL; + + action = kzalloc(sizeof(*action) + extra_size, GFP_KERNEL); + if (!action) + return NULL; + + action->action_type = action_type; + refcount_set(&action->refcount, 1); + action->data = action + 1; + + return action; +} + +struct mlx5dr_action *mlx5dr_action_create_drop(void) +{ + return dr_action_create_generic(DR_ACTION_TYP_DROP); +} + +struct mlx5dr_action * +mlx5dr_action_create_dest_table_num(struct mlx5dr_domain *dmn, u32 table_num) +{ + struct mlx5dr_action *action; + + action = dr_action_create_generic(DR_ACTION_TYP_FT); + if (!action) + return NULL; + + action->dest_tbl->is_fw_tbl = true; + action->dest_tbl->fw_tbl.dmn = dmn; + action->dest_tbl->fw_tbl.id = table_num; + action->dest_tbl->fw_tbl.type = FS_FT_FDB; + refcount_inc(&dmn->refcount); + + return action; +} + +struct mlx5dr_action * +mlx5dr_action_create_dest_table(struct mlx5dr_table *tbl) +{ + struct mlx5dr_action *action; + + refcount_inc(&tbl->refcount); + + action = dr_action_create_generic(DR_ACTION_TYP_FT); + if (!action) + goto dec_ref; + + action->dest_tbl->tbl = tbl; + + return action; + +dec_ref: + refcount_dec(&tbl->refcount); + return NULL; +} + +static void dr_action_range_definer_fill(u16 *format_id, + u8 *dw_selectors, + u8 *byte_selectors, + u8 *match_mask) +{ + int i; + + *format_id = MLX5_IFC_DEFINER_FORMAT_ID_SELECT; + + dw_selectors[0] = MLX5_IFC_DEFINER_FORMAT_OFFSET_OUTER_ETH_PKT_LEN / 4; + + for (i = 1; i < MLX5_IFC_DEFINER_DW_SELECTORS_NUM; i++) + dw_selectors[i] = MLX5_IFC_DEFINER_FORMAT_OFFSET_UNUSED; + + for (i = 0; i < MLX5_IFC_DEFINER_BYTE_SELECTORS_NUM; i++) + byte_selectors[i] = MLX5_IFC_DEFINER_FORMAT_OFFSET_UNUSED; + + MLX5_SET(match_definer_match_mask, match_mask, + match_dw_0, 0xffffUL << 16); +} + +static int dr_action_create_range_definer(struct mlx5dr_action *action) +{ + u8 match_mask[MLX5_FLD_SZ_BYTES(match_definer, match_mask)] = {}; + u8 byte_selectors[MLX5_IFC_DEFINER_BYTE_SELECTORS_NUM] = {}; + u8 dw_selectors[MLX5_IFC_DEFINER_DW_SELECTORS_NUM] = {}; + struct mlx5dr_domain *dmn = action->range->dmn; + u32 definer_id; + u16 format_id; + int ret; + + dr_action_range_definer_fill(&format_id, + dw_selectors, + byte_selectors, + match_mask); + + ret = mlx5dr_definer_get(dmn, format_id, + dw_selectors, byte_selectors, + match_mask, &definer_id); + if (ret) + return ret; + + action->range->definer_id = definer_id; + return 0; +} + +static void dr_action_destroy_range_definer(struct mlx5dr_action *action) +{ + mlx5dr_definer_put(action->range->dmn, action->range->definer_id); +} + +struct mlx5dr_action * +mlx5dr_action_create_dest_match_range(struct mlx5dr_domain *dmn, + u32 field, + struct mlx5_flow_table *hit_ft, + struct mlx5_flow_table *miss_ft, + u32 min, + u32 max) +{ + struct mlx5dr_action *action; + int ret; + + if (!mlx5dr_supp_match_ranges(dmn->mdev)) { + mlx5dr_dbg(dmn, "SELECT definer support is needed for match range\n"); + return NULL; + } + + if (field != MLX5_FLOW_DEST_RANGE_FIELD_PKT_LEN || + min > 0xffff || max > 0xffff) { + mlx5dr_err(dmn, "Invalid match range parameters\n"); + return NULL; + } + + action = dr_action_create_generic(DR_ACTION_TYP_RANGE); + if (!action) + return NULL; + + action->range->hit_tbl_action = + mlx5dr_is_fw_table(hit_ft) ? + mlx5dr_action_create_dest_flow_fw_table(dmn, hit_ft) : + mlx5dr_action_create_dest_table(hit_ft->fs_dr_table.dr_table); + + if (!action->range->hit_tbl_action) + goto free_action; + + action->range->miss_tbl_action = + mlx5dr_is_fw_table(miss_ft) ? + mlx5dr_action_create_dest_flow_fw_table(dmn, miss_ft) : + mlx5dr_action_create_dest_table(miss_ft->fs_dr_table.dr_table); + + if (!action->range->miss_tbl_action) + goto free_hit_tbl_action; + + action->range->min = min; + action->range->max = max; + action->range->dmn = dmn; + + ret = dr_action_create_range_definer(action); + if (ret) + goto free_miss_tbl_action; + + /* No need to increase refcount on domain for this action, + * the hit/miss table actions will do it internally. + */ + + return action; + +free_miss_tbl_action: + mlx5dr_action_destroy(action->range->miss_tbl_action); +free_hit_tbl_action: + mlx5dr_action_destroy(action->range->hit_tbl_action); +free_action: + kfree(action); + + return NULL; +} + +struct mlx5dr_action * +mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, + struct mlx5dr_action_dest *dests, + u32 num_of_dests, + bool ignore_flow_level, + u32 flow_source) +{ + struct mlx5dr_cmd_flow_destination_hw_info *hw_dests; + struct mlx5dr_action **ref_actions; + struct mlx5dr_action *action; + bool reformat_req = false; + u32 num_of_ref = 0; + u32 ref_act_cnt; + int ret; + int i; + + if (dmn->type != MLX5DR_DOMAIN_TYPE_FDB) { + mlx5dr_err(dmn, "Multiple destination support is for FDB only\n"); + return NULL; + } + + hw_dests = kcalloc(num_of_dests, sizeof(*hw_dests), GFP_KERNEL); + if (!hw_dests) + return NULL; + + if (unlikely(check_mul_overflow(num_of_dests, 2u, &ref_act_cnt))) + goto free_hw_dests; + + ref_actions = kcalloc(ref_act_cnt, sizeof(*ref_actions), GFP_KERNEL); + if (!ref_actions) + goto free_hw_dests; + + for (i = 0; i < num_of_dests; i++) { + struct mlx5dr_action *reformat_action = dests[i].reformat; + struct mlx5dr_action *dest_action = dests[i].dest; + + ref_actions[num_of_ref++] = dest_action; + + switch (dest_action->action_type) { + case DR_ACTION_TYP_VPORT: + hw_dests[i].vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; + hw_dests[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + hw_dests[i].vport.num = dest_action->vport->caps->num; + hw_dests[i].vport.vhca_id = dest_action->vport->caps->vhca_gvmi; + if (reformat_action) { + reformat_req = true; + hw_dests[i].vport.reformat_id = + reformat_action->reformat->id; + ref_actions[num_of_ref++] = reformat_action; + hw_dests[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; + } + break; + + case DR_ACTION_TYP_FT: + hw_dests[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + if (dest_action->dest_tbl->is_fw_tbl) + hw_dests[i].ft_id = dest_action->dest_tbl->fw_tbl.id; + else + hw_dests[i].ft_id = dest_action->dest_tbl->tbl->table_id; + break; + + default: + mlx5dr_dbg(dmn, "Invalid multiple destinations action\n"); + goto free_ref_actions; + } + } + + action = dr_action_create_generic(DR_ACTION_TYP_FT); + if (!action) + goto free_ref_actions; + + ret = mlx5dr_fw_create_md_tbl(dmn, + hw_dests, + num_of_dests, + reformat_req, + &action->dest_tbl->fw_tbl.id, + &action->dest_tbl->fw_tbl.group_id, + ignore_flow_level, + flow_source); + if (ret) + goto free_action; + + refcount_inc(&dmn->refcount); + + for (i = 0; i < num_of_ref; i++) + refcount_inc(&ref_actions[i]->refcount); + + action->dest_tbl->is_fw_tbl = true; + action->dest_tbl->fw_tbl.dmn = dmn; + action->dest_tbl->fw_tbl.type = FS_FT_FDB; + action->dest_tbl->fw_tbl.ref_actions = ref_actions; + action->dest_tbl->fw_tbl.num_of_ref_actions = num_of_ref; + + kfree(hw_dests); + + return action; + +free_action: + kfree(action); +free_ref_actions: + kfree(ref_actions); +free_hw_dests: + kfree(hw_dests); + return NULL; +} + +struct mlx5dr_action * +mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *dmn, + struct mlx5_flow_table *ft) +{ + struct mlx5dr_action *action; + + action = dr_action_create_generic(DR_ACTION_TYP_FT); + if (!action) + return NULL; + + action->dest_tbl->is_fw_tbl = 1; + action->dest_tbl->fw_tbl.type = ft->type; + action->dest_tbl->fw_tbl.id = ft->id; + action->dest_tbl->fw_tbl.dmn = dmn; + + refcount_inc(&dmn->refcount); + + return action; +} + +struct mlx5dr_action * +mlx5dr_action_create_flow_counter(u32 counter_id) +{ + struct mlx5dr_action *action; + + action = dr_action_create_generic(DR_ACTION_TYP_CTR); + if (!action) + return NULL; + + action->ctr->ctr_id = counter_id; + + return action; +} + +struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value) +{ + struct mlx5dr_action *action; + + action = dr_action_create_generic(DR_ACTION_TYP_TAG); + if (!action) + return NULL; + + action->flow_tag->flow_tag = tag_value & 0xffffff; + + return action; +} + +struct mlx5dr_action * +mlx5dr_action_create_flow_sampler(struct mlx5dr_domain *dmn, u32 sampler_id) +{ + struct mlx5dr_action *action; + u64 icm_rx, icm_tx; + int ret; + + ret = mlx5dr_cmd_query_flow_sampler(dmn->mdev, sampler_id, + &icm_rx, &icm_tx); + if (ret) + return NULL; + + action = dr_action_create_generic(DR_ACTION_TYP_SAMPLER); + if (!action) + return NULL; + + action->sampler->dmn = dmn; + action->sampler->sampler_id = sampler_id; + action->sampler->rx_icm_addr = icm_rx; + action->sampler->tx_icm_addr = icm_tx; + + refcount_inc(&dmn->refcount); + return action; +} + +static int +dr_action_verify_reformat_params(enum mlx5dr_action_type reformat_type, + struct mlx5dr_domain *dmn, + u8 reformat_param_0, + u8 reformat_param_1, + size_t data_sz, + void *data) +{ + if (reformat_type == DR_ACTION_TYP_INSERT_HDR) { + if ((!data && data_sz) || (data && !data_sz) || + MLX5_CAP_GEN_2(dmn->mdev, max_reformat_insert_size) < data_sz || + MLX5_CAP_GEN_2(dmn->mdev, max_reformat_insert_offset) < reformat_param_1) { + mlx5dr_dbg(dmn, "Invalid reformat parameters for INSERT_HDR\n"); + goto out_err; + } + } else if (reformat_type == DR_ACTION_TYP_REMOVE_HDR) { + if (data || + MLX5_CAP_GEN_2(dmn->mdev, max_reformat_remove_size) < data_sz || + MLX5_CAP_GEN_2(dmn->mdev, max_reformat_remove_offset) < reformat_param_1) { + mlx5dr_dbg(dmn, "Invalid reformat parameters for REMOVE_HDR\n"); + goto out_err; + } + } else if (reformat_param_0 || reformat_param_1 || + reformat_type > DR_ACTION_TYP_REMOVE_HDR) { + mlx5dr_dbg(dmn, "Invalid reformat parameters\n"); + goto out_err; + } + + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB) + return 0; + + if (dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) { + if (reformat_type != DR_ACTION_TYP_TNL_L2_TO_L2 && + reformat_type != DR_ACTION_TYP_TNL_L3_TO_L2) { + mlx5dr_dbg(dmn, "Action reformat type not support on RX domain\n"); + goto out_err; + } + } else if (dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) { + if (reformat_type != DR_ACTION_TYP_L2_TO_TNL_L2 && + reformat_type != DR_ACTION_TYP_L2_TO_TNL_L3) { + mlx5dr_dbg(dmn, "Action reformat type not support on TX domain\n"); + goto out_err; + } + } + + return 0; + +out_err: + return -EINVAL; +} + +static int +dr_action_create_reformat_action(struct mlx5dr_domain *dmn, + u8 reformat_param_0, u8 reformat_param_1, + size_t data_sz, void *data, + struct mlx5dr_action *action) +{ + u32 reformat_id; + int ret; + + switch (action->action_type) { + case DR_ACTION_TYP_L2_TO_TNL_L2: + case DR_ACTION_TYP_L2_TO_TNL_L3: + { + enum mlx5_reformat_ctx_type rt; + + if (action->action_type == DR_ACTION_TYP_L2_TO_TNL_L2) + rt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL; + else + rt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL; + + ret = mlx5dr_cmd_create_reformat_ctx(dmn->mdev, rt, 0, 0, + data_sz, data, + &reformat_id); + if (ret) + return ret; + + action->reformat->id = reformat_id; + action->reformat->size = data_sz; + return 0; + } + case DR_ACTION_TYP_TNL_L2_TO_L2: + { + return 0; + } + case DR_ACTION_TYP_TNL_L3_TO_L2: + { + u8 *hw_actions; + + hw_actions = kzalloc(DR_ACTION_CACHE_LINE_SIZE, GFP_KERNEL); + if (!hw_actions) + return -ENOMEM; + + ret = mlx5dr_ste_set_action_decap_l3_list(dmn->ste_ctx, + data, data_sz, + hw_actions, + DR_ACTION_CACHE_LINE_SIZE, + &action->rewrite->num_of_actions); + if (ret) { + mlx5dr_dbg(dmn, "Failed creating decap l3 action list\n"); + kfree(hw_actions); + return ret; + } + + action->rewrite->data = hw_actions; + action->rewrite->dmn = dmn; + + ret = mlx5dr_ste_alloc_modify_hdr(action); + if (ret) { + mlx5dr_dbg(dmn, "Failed preparing reformat data\n"); + kfree(hw_actions); + return ret; + } + return 0; + } + case DR_ACTION_TYP_INSERT_HDR: + ret = mlx5dr_cmd_create_reformat_ctx(dmn->mdev, + MLX5_REFORMAT_TYPE_INSERT_HDR, + reformat_param_0, + reformat_param_1, + data_sz, data, + &reformat_id); + if (ret) + return ret; + + action->reformat->id = reformat_id; + action->reformat->size = data_sz; + action->reformat->param_0 = reformat_param_0; + action->reformat->param_1 = reformat_param_1; + return 0; + case DR_ACTION_TYP_REMOVE_HDR: + action->reformat->id = 0; + action->reformat->size = data_sz; + action->reformat->param_0 = reformat_param_0; + action->reformat->param_1 = reformat_param_1; + return 0; + default: + mlx5dr_info(dmn, "Reformat type is not supported %d\n", action->action_type); + return -EINVAL; + } +} + +#define CVLAN_ETHERTYPE 0x8100 +#define SVLAN_ETHERTYPE 0x88a8 + +struct mlx5dr_action *mlx5dr_action_create_pop_vlan(void) +{ + return dr_action_create_generic(DR_ACTION_TYP_POP_VLAN); +} + +struct mlx5dr_action *mlx5dr_action_create_push_vlan(struct mlx5dr_domain *dmn, + __be32 vlan_hdr) +{ + u32 vlan_hdr_h = ntohl(vlan_hdr); + u16 ethertype = vlan_hdr_h >> 16; + struct mlx5dr_action *action; + + if (ethertype != SVLAN_ETHERTYPE && ethertype != CVLAN_ETHERTYPE) { + mlx5dr_dbg(dmn, "Invalid vlan ethertype\n"); + return NULL; + } + + action = dr_action_create_generic(DR_ACTION_TYP_PUSH_VLAN); + if (!action) + return NULL; + + action->push_vlan->vlan_hdr = vlan_hdr_h; + return action; +} + +struct mlx5dr_action * +mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn, + enum mlx5dr_action_reformat_type reformat_type, + u8 reformat_param_0, + u8 reformat_param_1, + size_t data_sz, + void *data) +{ + enum mlx5dr_action_type action_type; + struct mlx5dr_action *action; + int ret; + + refcount_inc(&dmn->refcount); + + /* General checks */ + ret = dr_action_reformat_to_action_type(reformat_type, &action_type); + if (ret) { + mlx5dr_dbg(dmn, "Invalid reformat_type provided\n"); + goto dec_ref; + } + + ret = dr_action_verify_reformat_params(action_type, dmn, + reformat_param_0, reformat_param_1, + data_sz, data); + if (ret) + goto dec_ref; + + action = dr_action_create_generic(action_type); + if (!action) + goto dec_ref; + + action->reformat->dmn = dmn; + + ret = dr_action_create_reformat_action(dmn, + reformat_param_0, + reformat_param_1, + data_sz, + data, + action); + if (ret) { + mlx5dr_dbg(dmn, "Failed creating reformat action %d\n", ret); + goto free_action; + } + + return action; + +free_action: + kfree(action); +dec_ref: + refcount_dec(&dmn->refcount); + return NULL; +} + +static int +dr_action_modify_sw_to_hw_add(struct mlx5dr_domain *dmn, + __be64 *sw_action, + __be64 *hw_action, + const struct mlx5dr_ste_action_modify_field **ret_hw_info) +{ + const struct mlx5dr_ste_action_modify_field *hw_action_info; + u8 max_length; + u16 sw_field; + u32 data; + + /* Get SW modify action data */ + sw_field = MLX5_GET(set_action_in, sw_action, field); + data = MLX5_GET(set_action_in, sw_action, data); + + /* Convert SW data to HW modify action format */ + hw_action_info = mlx5dr_ste_conv_modify_hdr_sw_field(dmn->ste_ctx, sw_field); + if (!hw_action_info) { + mlx5dr_dbg(dmn, "Modify add action invalid field given\n"); + return -EINVAL; + } + + max_length = hw_action_info->end - hw_action_info->start + 1; + + mlx5dr_ste_set_action_add(dmn->ste_ctx, + hw_action, + hw_action_info->hw_field, + hw_action_info->start, + max_length, + data); + + *ret_hw_info = hw_action_info; + + return 0; +} + +static int +dr_action_modify_sw_to_hw_set(struct mlx5dr_domain *dmn, + __be64 *sw_action, + __be64 *hw_action, + const struct mlx5dr_ste_action_modify_field **ret_hw_info) +{ + const struct mlx5dr_ste_action_modify_field *hw_action_info; + u8 offset, length, max_length; + u16 sw_field; + u32 data; + + /* Get SW modify action data */ + length = MLX5_GET(set_action_in, sw_action, length); + offset = MLX5_GET(set_action_in, sw_action, offset); + sw_field = MLX5_GET(set_action_in, sw_action, field); + data = MLX5_GET(set_action_in, sw_action, data); + + /* Convert SW data to HW modify action format */ + hw_action_info = mlx5dr_ste_conv_modify_hdr_sw_field(dmn->ste_ctx, sw_field); + if (!hw_action_info) { + mlx5dr_dbg(dmn, "Modify set action invalid field given\n"); + return -EINVAL; + } + + /* PRM defines that length zero specific length of 32bits */ + length = length ? length : 32; + + max_length = hw_action_info->end - hw_action_info->start + 1; + + if (length + offset > max_length) { + mlx5dr_dbg(dmn, "Modify action length + offset exceeds limit\n"); + return -EINVAL; + } + + mlx5dr_ste_set_action_set(dmn->ste_ctx, + hw_action, + hw_action_info->hw_field, + hw_action_info->start + offset, + length, + data); + + *ret_hw_info = hw_action_info; + + return 0; +} + +static int +dr_action_modify_sw_to_hw_copy(struct mlx5dr_domain *dmn, + __be64 *sw_action, + __be64 *hw_action, + const struct mlx5dr_ste_action_modify_field **ret_dst_hw_info, + const struct mlx5dr_ste_action_modify_field **ret_src_hw_info) +{ + u8 src_offset, dst_offset, src_max_length, dst_max_length, length; + const struct mlx5dr_ste_action_modify_field *hw_dst_action_info; + const struct mlx5dr_ste_action_modify_field *hw_src_action_info; + u16 src_field, dst_field; + + /* Get SW modify action data */ + src_field = MLX5_GET(copy_action_in, sw_action, src_field); + dst_field = MLX5_GET(copy_action_in, sw_action, dst_field); + src_offset = MLX5_GET(copy_action_in, sw_action, src_offset); + dst_offset = MLX5_GET(copy_action_in, sw_action, dst_offset); + length = MLX5_GET(copy_action_in, sw_action, length); + + /* Convert SW data to HW modify action format */ + hw_src_action_info = mlx5dr_ste_conv_modify_hdr_sw_field(dmn->ste_ctx, src_field); + hw_dst_action_info = mlx5dr_ste_conv_modify_hdr_sw_field(dmn->ste_ctx, dst_field); + if (!hw_src_action_info || !hw_dst_action_info) { + mlx5dr_dbg(dmn, "Modify copy action invalid field given\n"); + return -EINVAL; + } + + /* PRM defines that length zero specific length of 32bits */ + length = length ? length : 32; + + src_max_length = hw_src_action_info->end - + hw_src_action_info->start + 1; + dst_max_length = hw_dst_action_info->end - + hw_dst_action_info->start + 1; + + if (length + src_offset > src_max_length || + length + dst_offset > dst_max_length) { + mlx5dr_dbg(dmn, "Modify action length + offset exceeds limit\n"); + return -EINVAL; + } + + mlx5dr_ste_set_action_copy(dmn->ste_ctx, + hw_action, + hw_dst_action_info->hw_field, + hw_dst_action_info->start + dst_offset, + length, + hw_src_action_info->hw_field, + hw_src_action_info->start + src_offset); + + *ret_dst_hw_info = hw_dst_action_info; + *ret_src_hw_info = hw_src_action_info; + + return 0; +} + +static int +dr_action_modify_sw_to_hw(struct mlx5dr_domain *dmn, + __be64 *sw_action, + __be64 *hw_action, + const struct mlx5dr_ste_action_modify_field **ret_dst_hw_info, + const struct mlx5dr_ste_action_modify_field **ret_src_hw_info) +{ + u8 action; + int ret; + + *hw_action = 0; + *ret_src_hw_info = NULL; + + /* Get SW modify action type */ + action = MLX5_GET(set_action_in, sw_action, action_type); + + switch (action) { + case MLX5_ACTION_TYPE_SET: + ret = dr_action_modify_sw_to_hw_set(dmn, sw_action, + hw_action, + ret_dst_hw_info); + break; + + case MLX5_ACTION_TYPE_ADD: + ret = dr_action_modify_sw_to_hw_add(dmn, sw_action, + hw_action, + ret_dst_hw_info); + break; + + case MLX5_ACTION_TYPE_COPY: + ret = dr_action_modify_sw_to_hw_copy(dmn, sw_action, + hw_action, + ret_dst_hw_info, + ret_src_hw_info); + break; + + default: + mlx5dr_info(dmn, "Unsupported action_type for modify action\n"); + ret = -EOPNOTSUPP; + } + + return ret; +} + +static int +dr_action_modify_check_set_field_limitation(struct mlx5dr_action *action, + const __be64 *sw_action) +{ + u16 sw_field = MLX5_GET(set_action_in, sw_action, field); + struct mlx5dr_domain *dmn = action->rewrite->dmn; + + if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_A) { + action->rewrite->allow_rx = 0; + if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_TX) { + mlx5dr_dbg(dmn, "Unsupported field %d for RX/FDB set action\n", + sw_field); + return -EINVAL; + } + } else if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_B) { + action->rewrite->allow_tx = 0; + if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_RX) { + mlx5dr_dbg(dmn, "Unsupported field %d for TX/FDB set action\n", + sw_field); + return -EINVAL; + } + } + + if (!action->rewrite->allow_rx && !action->rewrite->allow_tx) { + mlx5dr_dbg(dmn, "Modify SET actions not supported on both RX and TX\n"); + return -EINVAL; + } + + return 0; +} + +static int +dr_action_modify_check_add_field_limitation(struct mlx5dr_action *action, + const __be64 *sw_action) +{ + u16 sw_field = MLX5_GET(set_action_in, sw_action, field); + struct mlx5dr_domain *dmn = action->rewrite->dmn; + + if (sw_field != MLX5_ACTION_IN_FIELD_OUT_IP_TTL && + sw_field != MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT && + sw_field != MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM && + sw_field != MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM) { + mlx5dr_dbg(dmn, "Unsupported field %d for add action\n", + sw_field); + return -EINVAL; + } + + return 0; +} + +static int +dr_action_modify_check_copy_field_limitation(struct mlx5dr_action *action, + const __be64 *sw_action) +{ + struct mlx5dr_domain *dmn = action->rewrite->dmn; + u16 sw_fields[2]; + int i; + + sw_fields[0] = MLX5_GET(copy_action_in, sw_action, src_field); + sw_fields[1] = MLX5_GET(copy_action_in, sw_action, dst_field); + + for (i = 0; i < 2; i++) { + if (sw_fields[i] == MLX5_ACTION_IN_FIELD_METADATA_REG_A) { + action->rewrite->allow_rx = 0; + if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_TX) { + mlx5dr_dbg(dmn, "Unsupported field %d for RX/FDB set action\n", + sw_fields[i]); + return -EINVAL; + } + } else if (sw_fields[i] == MLX5_ACTION_IN_FIELD_METADATA_REG_B) { + action->rewrite->allow_tx = 0; + if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_RX) { + mlx5dr_dbg(dmn, "Unsupported field %d for TX/FDB set action\n", + sw_fields[i]); + return -EINVAL; + } + } + } + + if (!action->rewrite->allow_rx && !action->rewrite->allow_tx) { + mlx5dr_dbg(dmn, "Modify copy actions not supported on both RX and TX\n"); + return -EINVAL; + } + + return 0; +} + +static int +dr_action_modify_check_field_limitation(struct mlx5dr_action *action, + const __be64 *sw_action) +{ + struct mlx5dr_domain *dmn = action->rewrite->dmn; + u8 action_type; + int ret; + + action_type = MLX5_GET(set_action_in, sw_action, action_type); + + switch (action_type) { + case MLX5_ACTION_TYPE_SET: + ret = dr_action_modify_check_set_field_limitation(action, + sw_action); + break; + + case MLX5_ACTION_TYPE_ADD: + ret = dr_action_modify_check_add_field_limitation(action, + sw_action); + break; + + case MLX5_ACTION_TYPE_COPY: + ret = dr_action_modify_check_copy_field_limitation(action, + sw_action); + break; + + default: + mlx5dr_info(dmn, "Unsupported action %d modify action\n", + action_type); + ret = -EOPNOTSUPP; + } + + return ret; +} + +static bool +dr_action_modify_check_is_ttl_modify(const void *sw_action) +{ + u16 sw_field = MLX5_GET(set_action_in, sw_action, field); + + return sw_field == MLX5_ACTION_IN_FIELD_OUT_IP_TTL; +} + +static int dr_actions_convert_modify_header(struct mlx5dr_action *action, + u32 max_hw_actions, + u32 num_sw_actions, + __be64 sw_actions[], + __be64 hw_actions[], + u32 *num_hw_actions, + bool *modify_ttl) +{ + const struct mlx5dr_ste_action_modify_field *hw_dst_action_info; + const struct mlx5dr_ste_action_modify_field *hw_src_action_info; + struct mlx5dr_domain *dmn = action->rewrite->dmn; + __be64 *modify_ttl_sw_action = NULL; + int ret, i, hw_idx = 0; + __be64 *sw_action; + __be64 hw_action; + u16 hw_field = 0; + u32 l3_type = 0; + u32 l4_type = 0; + + *modify_ttl = false; + + action->rewrite->allow_rx = 1; + action->rewrite->allow_tx = 1; + + for (i = 0; i < num_sw_actions || modify_ttl_sw_action; i++) { + /* modify TTL is handled separately, as a last action */ + if (i == num_sw_actions) { + sw_action = modify_ttl_sw_action; + modify_ttl_sw_action = NULL; + } else { + sw_action = &sw_actions[i]; + } + + ret = dr_action_modify_check_field_limitation(action, + sw_action); + if (ret) + return ret; + + if (!(*modify_ttl) && + dr_action_modify_check_is_ttl_modify(sw_action)) { + modify_ttl_sw_action = sw_action; + *modify_ttl = true; + continue; + } + + /* Convert SW action to HW action */ + ret = dr_action_modify_sw_to_hw(dmn, + sw_action, + &hw_action, + &hw_dst_action_info, + &hw_src_action_info); + if (ret) + return ret; + + /* Due to a HW limitation we cannot modify 2 different L3 types */ + if (l3_type && hw_dst_action_info->l3_type && + hw_dst_action_info->l3_type != l3_type) { + mlx5dr_dbg(dmn, "Action list can't support two different L3 types\n"); + return -EINVAL; + } + if (hw_dst_action_info->l3_type) + l3_type = hw_dst_action_info->l3_type; + + /* Due to a HW limitation we cannot modify two different L4 types */ + if (l4_type && hw_dst_action_info->l4_type && + hw_dst_action_info->l4_type != l4_type) { + mlx5dr_dbg(dmn, "Action list can't support two different L4 types\n"); + return -EINVAL; + } + if (hw_dst_action_info->l4_type) + l4_type = hw_dst_action_info->l4_type; + + /* HW reads and executes two actions at once this means we + * need to create a gap if two actions access the same field + */ + if ((hw_idx % 2) && (hw_field == hw_dst_action_info->hw_field || + (hw_src_action_info && + hw_field == hw_src_action_info->hw_field))) { + /* Check if after gap insertion the total number of HW + * modify actions doesn't exceeds the limit + */ + hw_idx++; + if (hw_idx >= max_hw_actions) { + mlx5dr_dbg(dmn, "Modify header action number exceeds HW limit\n"); + return -EINVAL; + } + } + hw_field = hw_dst_action_info->hw_field; + + hw_actions[hw_idx] = hw_action; + hw_idx++; + } + + /* if the resulting HW actions list is empty, add NOP action */ + if (!hw_idx) + hw_idx++; + + *num_hw_actions = hw_idx; + + return 0; +} + +static int dr_action_create_modify_action(struct mlx5dr_domain *dmn, + size_t actions_sz, + __be64 actions[], + struct mlx5dr_action *action) +{ + u32 max_hw_actions; + u32 num_hw_actions; + u32 num_sw_actions; + __be64 *hw_actions; + bool modify_ttl; + int ret; + + num_sw_actions = actions_sz / DR_MODIFY_ACTION_SIZE; + max_hw_actions = mlx5dr_icm_pool_chunk_size_to_entries(DR_CHUNK_SIZE_16); + + if (num_sw_actions > max_hw_actions) { + mlx5dr_dbg(dmn, "Max number of actions %d exceeds limit %d\n", + num_sw_actions, max_hw_actions); + return -EINVAL; + } + + hw_actions = kcalloc(1, max_hw_actions * DR_MODIFY_ACTION_SIZE, GFP_KERNEL); + if (!hw_actions) + return -ENOMEM; + + ret = dr_actions_convert_modify_header(action, + max_hw_actions, + num_sw_actions, + actions, + hw_actions, + &num_hw_actions, + &modify_ttl); + if (ret) + goto free_hw_actions; + + action->rewrite->modify_ttl = modify_ttl; + action->rewrite->data = (u8 *)hw_actions; + action->rewrite->num_of_actions = num_hw_actions; + + if (num_hw_actions == 1 && + dmn->info.caps.sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) { + action->rewrite->single_action_opt = true; + } else { + action->rewrite->single_action_opt = false; + ret = mlx5dr_ste_alloc_modify_hdr(action); + if (ret) + goto free_hw_actions; + } + + return 0; + +free_hw_actions: + kfree(hw_actions); + return ret; +} + +struct mlx5dr_action * +mlx5dr_action_create_modify_header(struct mlx5dr_domain *dmn, + u32 flags, + size_t actions_sz, + __be64 actions[]) +{ + struct mlx5dr_action *action; + int ret = 0; + + refcount_inc(&dmn->refcount); + + if (actions_sz % DR_MODIFY_ACTION_SIZE) { + mlx5dr_dbg(dmn, "Invalid modify actions size provided\n"); + goto dec_ref; + } + + action = dr_action_create_generic(DR_ACTION_TYP_MODIFY_HDR); + if (!action) + goto dec_ref; + + action->rewrite->dmn = dmn; + + ret = dr_action_create_modify_action(dmn, + actions_sz, + actions, + action); + if (ret) { + mlx5dr_dbg(dmn, "Failed creating modify header action %d\n", ret); + goto free_action; + } + + return action; + +free_action: + kfree(action); +dec_ref: + refcount_dec(&dmn->refcount); + return NULL; +} + +struct mlx5dr_action * +mlx5dr_action_create_dest_vport(struct mlx5dr_domain *dmn, + u16 vport, u8 vhca_id_valid, + u16 vhca_id) +{ + struct mlx5dr_cmd_vport_cap *vport_cap; + struct mlx5dr_domain *vport_dmn; + struct mlx5dr_action *action; + u8 peer_vport; + + peer_vport = vhca_id_valid && mlx5_core_is_pf(dmn->mdev) && + (vhca_id != dmn->info.caps.gvmi); + vport_dmn = peer_vport ? xa_load(&dmn->peer_dmn_xa, vhca_id) : dmn; + if (!vport_dmn) { + mlx5dr_dbg(dmn, "No peer vport domain for given vhca_id\n"); + return NULL; + } + + if (vport_dmn->type != MLX5DR_DOMAIN_TYPE_FDB) { + mlx5dr_dbg(dmn, "Domain doesn't support vport actions\n"); + return NULL; + } + + vport_cap = mlx5dr_domain_get_vport_cap(vport_dmn, vport); + if (!vport_cap) { + mlx5dr_err(dmn, + "Failed to get vport 0x%x caps - vport is disabled or invalid\n", + vport); + return NULL; + } + + action = dr_action_create_generic(DR_ACTION_TYP_VPORT); + if (!action) + return NULL; + + action->vport->dmn = vport_dmn; + action->vport->caps = vport_cap; + + return action; +} + +struct mlx5dr_action * +mlx5dr_action_create_aso(struct mlx5dr_domain *dmn, u32 obj_id, + u8 dest_reg_id, u8 aso_type, + u8 init_color, u8 meter_id) +{ + struct mlx5dr_action *action; + + if (aso_type != MLX5_EXE_ASO_FLOW_METER) + return NULL; + + if (init_color > MLX5_FLOW_METER_COLOR_UNDEFINED) + return NULL; + + action = dr_action_create_generic(DR_ACTION_TYP_ASO_FLOW_METER); + if (!action) + return NULL; + + action->aso->obj_id = obj_id; + action->aso->offset = meter_id; + action->aso->dest_reg_id = dest_reg_id; + action->aso->init_color = init_color; + action->aso->dmn = dmn; + + refcount_inc(&dmn->refcount); + + return action; +} + +u32 mlx5dr_action_get_pkt_reformat_id(struct mlx5dr_action *action) +{ + return action->reformat->id; +} + +int mlx5dr_action_destroy(struct mlx5dr_action *action) +{ + if (WARN_ON_ONCE(refcount_read(&action->refcount) > 1)) + return -EBUSY; + + switch (action->action_type) { + case DR_ACTION_TYP_FT: + if (action->dest_tbl->is_fw_tbl) + refcount_dec(&action->dest_tbl->fw_tbl.dmn->refcount); + else + refcount_dec(&action->dest_tbl->tbl->refcount); + + if (action->dest_tbl->is_fw_tbl && + action->dest_tbl->fw_tbl.num_of_ref_actions) { + struct mlx5dr_action **ref_actions; + int i; + + ref_actions = action->dest_tbl->fw_tbl.ref_actions; + for (i = 0; i < action->dest_tbl->fw_tbl.num_of_ref_actions; i++) + refcount_dec(&ref_actions[i]->refcount); + + kfree(ref_actions); + + mlx5dr_fw_destroy_md_tbl(action->dest_tbl->fw_tbl.dmn, + action->dest_tbl->fw_tbl.id, + action->dest_tbl->fw_tbl.group_id); + } + break; + case DR_ACTION_TYP_TNL_L2_TO_L2: + case DR_ACTION_TYP_REMOVE_HDR: + refcount_dec(&action->reformat->dmn->refcount); + break; + case DR_ACTION_TYP_TNL_L3_TO_L2: + mlx5dr_ste_free_modify_hdr(action); + kfree(action->rewrite->data); + refcount_dec(&action->rewrite->dmn->refcount); + break; + case DR_ACTION_TYP_L2_TO_TNL_L2: + case DR_ACTION_TYP_L2_TO_TNL_L3: + case DR_ACTION_TYP_INSERT_HDR: + mlx5dr_cmd_destroy_reformat_ctx((action->reformat->dmn)->mdev, + action->reformat->id); + refcount_dec(&action->reformat->dmn->refcount); + break; + case DR_ACTION_TYP_MODIFY_HDR: + if (!action->rewrite->single_action_opt) + mlx5dr_ste_free_modify_hdr(action); + kfree(action->rewrite->data); + refcount_dec(&action->rewrite->dmn->refcount); + break; + case DR_ACTION_TYP_SAMPLER: + refcount_dec(&action->sampler->dmn->refcount); + break; + case DR_ACTION_TYP_ASO_FLOW_METER: + refcount_dec(&action->aso->dmn->refcount); + break; + case DR_ACTION_TYP_RANGE: + dr_action_destroy_range_definer(action); + mlx5dr_action_destroy(action->range->miss_tbl_action); + mlx5dr_action_destroy(action->range->hit_tbl_action); + break; + default: + break; + } + + kfree(action); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_arg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_arg.c new file mode 100644 index 0000000000..01ed644209 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_arg.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "dr_types.h" + +#define DR_ICM_MODIFY_HDR_GRANULARITY_4K 12 + +/* modify-header arg pool */ +enum dr_arg_chunk_size { + DR_ARG_CHUNK_SIZE_1, + DR_ARG_CHUNK_SIZE_MIN = DR_ARG_CHUNK_SIZE_1, /* keep updated when changing */ + DR_ARG_CHUNK_SIZE_2, + DR_ARG_CHUNK_SIZE_3, + DR_ARG_CHUNK_SIZE_4, + DR_ARG_CHUNK_SIZE_MAX, +}; + +/* argument pool area */ +struct dr_arg_pool { + enum dr_arg_chunk_size log_chunk_size; + struct mlx5dr_domain *dmn; + struct list_head free_list; + struct mutex mutex; /* protect arg pool */ +}; + +struct mlx5dr_arg_mgr { + struct mlx5dr_domain *dmn; + struct dr_arg_pool *pools[DR_ARG_CHUNK_SIZE_MAX]; +}; + +static int dr_arg_pool_alloc_objs(struct dr_arg_pool *pool) +{ + struct mlx5dr_arg_obj *arg_obj, *tmp_arg; + struct list_head cur_list; + u16 object_range; + int num_of_objects; + u32 obj_id = 0; + int i, ret; + + INIT_LIST_HEAD(&cur_list); + + object_range = + pool->dmn->info.caps.log_header_modify_argument_granularity; + + object_range = + max_t(u32, pool->dmn->info.caps.log_header_modify_argument_granularity, + DR_ICM_MODIFY_HDR_GRANULARITY_4K); + object_range = + min_t(u32, pool->dmn->info.caps.log_header_modify_argument_max_alloc, + object_range); + + if (pool->log_chunk_size > object_range) { + mlx5dr_err(pool->dmn, "Required chunk size (%d) is not supported\n", + pool->log_chunk_size); + return -ENOMEM; + } + + num_of_objects = (1 << (object_range - pool->log_chunk_size)); + /* Only one devx object per range */ + ret = mlx5dr_cmd_create_modify_header_arg(pool->dmn->mdev, + object_range, + pool->dmn->pdn, + &obj_id); + if (ret) { + mlx5dr_err(pool->dmn, "failed allocating object with range: %d:\n", + object_range); + return -EAGAIN; + } + + for (i = 0; i < num_of_objects; i++) { + arg_obj = kzalloc(sizeof(*arg_obj), GFP_KERNEL); + if (!arg_obj) { + ret = -ENOMEM; + goto clean_arg_obj; + } + + arg_obj->log_chunk_size = pool->log_chunk_size; + + list_add_tail(&arg_obj->list_node, &cur_list); + + arg_obj->obj_id = obj_id; + arg_obj->obj_offset = i * (1 << pool->log_chunk_size); + } + list_splice_tail_init(&cur_list, &pool->free_list); + + return 0; + +clean_arg_obj: + mlx5dr_cmd_destroy_modify_header_arg(pool->dmn->mdev, obj_id); + list_for_each_entry_safe(arg_obj, tmp_arg, &cur_list, list_node) { + list_del(&arg_obj->list_node); + kfree(arg_obj); + } + return ret; +} + +static struct mlx5dr_arg_obj *dr_arg_pool_get_arg_obj(struct dr_arg_pool *pool) +{ + struct mlx5dr_arg_obj *arg_obj = NULL; + int ret; + + mutex_lock(&pool->mutex); + if (list_empty(&pool->free_list)) { + ret = dr_arg_pool_alloc_objs(pool); + if (ret) + goto out; + } + + arg_obj = list_first_entry_or_null(&pool->free_list, + struct mlx5dr_arg_obj, + list_node); + WARN(!arg_obj, "couldn't get dr arg obj from pool"); + + if (arg_obj) + list_del_init(&arg_obj->list_node); + +out: + mutex_unlock(&pool->mutex); + return arg_obj; +} + +static void dr_arg_pool_put_arg_obj(struct dr_arg_pool *pool, + struct mlx5dr_arg_obj *arg_obj) +{ + mutex_lock(&pool->mutex); + list_add(&arg_obj->list_node, &pool->free_list); + mutex_unlock(&pool->mutex); +} + +static struct dr_arg_pool *dr_arg_pool_create(struct mlx5dr_domain *dmn, + enum dr_arg_chunk_size chunk_size) +{ + struct dr_arg_pool *pool; + + pool = kzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + return NULL; + + pool->dmn = dmn; + + INIT_LIST_HEAD(&pool->free_list); + mutex_init(&pool->mutex); + + pool->log_chunk_size = chunk_size; + if (dr_arg_pool_alloc_objs(pool)) + goto free_pool; + + return pool; + +free_pool: + kfree(pool); + + return NULL; +} + +static void dr_arg_pool_destroy(struct dr_arg_pool *pool) +{ + struct mlx5dr_arg_obj *arg_obj, *tmp_arg; + + list_for_each_entry_safe(arg_obj, tmp_arg, &pool->free_list, list_node) { + list_del(&arg_obj->list_node); + if (!arg_obj->obj_offset) /* the first in range */ + mlx5dr_cmd_destroy_modify_header_arg(pool->dmn->mdev, arg_obj->obj_id); + kfree(arg_obj); + } + + mutex_destroy(&pool->mutex); + kfree(pool); +} + +static enum dr_arg_chunk_size dr_arg_get_chunk_size(u16 num_of_actions) +{ + if (num_of_actions <= 8) + return DR_ARG_CHUNK_SIZE_1; + if (num_of_actions <= 16) + return DR_ARG_CHUNK_SIZE_2; + if (num_of_actions <= 32) + return DR_ARG_CHUNK_SIZE_3; + if (num_of_actions <= 64) + return DR_ARG_CHUNK_SIZE_4; + + return DR_ARG_CHUNK_SIZE_MAX; +} + +u32 mlx5dr_arg_get_obj_id(struct mlx5dr_arg_obj *arg_obj) +{ + return (arg_obj->obj_id + arg_obj->obj_offset); +} + +struct mlx5dr_arg_obj *mlx5dr_arg_get_obj(struct mlx5dr_arg_mgr *mgr, + u16 num_of_actions, + u8 *data) +{ + u32 size = dr_arg_get_chunk_size(num_of_actions); + struct mlx5dr_arg_obj *arg_obj; + int ret; + + if (size >= DR_ARG_CHUNK_SIZE_MAX) + return NULL; + + arg_obj = dr_arg_pool_get_arg_obj(mgr->pools[size]); + if (!arg_obj) { + mlx5dr_err(mgr->dmn, "Failed allocating args object for modify header\n"); + return NULL; + } + + /* write it into the hw */ + ret = mlx5dr_send_postsend_args(mgr->dmn, + mlx5dr_arg_get_obj_id(arg_obj), + num_of_actions, data); + if (ret) { + mlx5dr_err(mgr->dmn, "Failed writing args object\n"); + goto put_obj; + } + + return arg_obj; + +put_obj: + mlx5dr_arg_put_obj(mgr, arg_obj); + return NULL; +} + +void mlx5dr_arg_put_obj(struct mlx5dr_arg_mgr *mgr, + struct mlx5dr_arg_obj *arg_obj) +{ + dr_arg_pool_put_arg_obj(mgr->pools[arg_obj->log_chunk_size], arg_obj); +} + +struct mlx5dr_arg_mgr* +mlx5dr_arg_mgr_create(struct mlx5dr_domain *dmn) +{ + struct mlx5dr_arg_mgr *pool_mgr; + int i; + + if (!mlx5dr_domain_is_support_ptrn_arg(dmn)) + return NULL; + + pool_mgr = kzalloc(sizeof(*pool_mgr), GFP_KERNEL); + if (!pool_mgr) + return NULL; + + pool_mgr->dmn = dmn; + + for (i = 0; i < DR_ARG_CHUNK_SIZE_MAX; i++) { + pool_mgr->pools[i] = dr_arg_pool_create(dmn, i); + if (!pool_mgr->pools[i]) + goto clean_pools; + } + + return pool_mgr; + +clean_pools: + for (i--; i >= 0; i--) + dr_arg_pool_destroy(pool_mgr->pools[i]); + + kfree(pool_mgr); + return NULL; +} + +void mlx5dr_arg_mgr_destroy(struct mlx5dr_arg_mgr *mgr) +{ + struct dr_arg_pool **pools; + int i; + + if (!mgr) + return; + + pools = mgr->pools; + for (i = 0; i < DR_ARG_CHUNK_SIZE_MAX; i++) + dr_arg_pool_destroy(pools[i]); + + kfree(mgr); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_buddy.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_buddy.c new file mode 100644 index 0000000000..fe228d948b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_buddy.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 - 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2006 - 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. + */ + +#include "dr_types.h" + +int mlx5dr_buddy_init(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int max_order) +{ + int i; + + buddy->max_order = max_order; + + INIT_LIST_HEAD(&buddy->list_node); + + buddy->bitmap = kcalloc(buddy->max_order + 1, + sizeof(*buddy->bitmap), + GFP_KERNEL); + buddy->num_free = kcalloc(buddy->max_order + 1, + sizeof(*buddy->num_free), + GFP_KERNEL); + + if (!buddy->bitmap || !buddy->num_free) + goto err_free_all; + + /* Allocating max_order bitmaps, one for each order */ + + for (i = 0; i <= buddy->max_order; ++i) { + unsigned int size = 1 << (buddy->max_order - i); + + buddy->bitmap[i] = bitmap_zalloc(size, GFP_KERNEL); + if (!buddy->bitmap[i]) + goto err_out_free_each_bit_per_order; + } + + /* In the beginning, we have only one order that is available for + * use (the biggest one), so mark the first bit in both bitmaps. + */ + + bitmap_set(buddy->bitmap[buddy->max_order], 0, 1); + + buddy->num_free[buddy->max_order] = 1; + + return 0; + +err_out_free_each_bit_per_order: + for (i = 0; i <= buddy->max_order; ++i) + bitmap_free(buddy->bitmap[i]); + +err_free_all: + kfree(buddy->num_free); + kfree(buddy->bitmap); + return -ENOMEM; +} + +void mlx5dr_buddy_cleanup(struct mlx5dr_icm_buddy_mem *buddy) +{ + int i; + + list_del(&buddy->list_node); + + for (i = 0; i <= buddy->max_order; ++i) + bitmap_free(buddy->bitmap[i]); + + kfree(buddy->num_free); + kfree(buddy->bitmap); +} + +static int dr_buddy_find_free_seg(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int start_order, + unsigned int *segment, + unsigned int *order) +{ + unsigned int seg, order_iter, m; + + for (order_iter = start_order; + order_iter <= buddy->max_order; ++order_iter) { + if (!buddy->num_free[order_iter]) + continue; + + m = 1 << (buddy->max_order - order_iter); + seg = find_first_bit(buddy->bitmap[order_iter], m); + + if (WARN(seg >= m, + "ICM Buddy: failed finding free mem for order %d\n", + order_iter)) + return -ENOMEM; + + break; + } + + if (order_iter > buddy->max_order) + return -ENOMEM; + + *segment = seg; + *order = order_iter; + return 0; +} + +/** + * mlx5dr_buddy_alloc_mem() - Update second level bitmap. + * @buddy: Buddy to update. + * @order: Order of the buddy to update. + * @segment: Segment number. + * + * This function finds the first area of the ICM memory managed by this buddy. + * It uses the data structures of the buddy system in order to find the first + * area of free place, starting from the current order till the maximum order + * in the system. + * + * Return: 0 when segment is set, non-zero error status otherwise. + * + * The function returns the location (segment) in the whole buddy ICM memory + * area - the index of the memory segment that is available for use. + */ +int mlx5dr_buddy_alloc_mem(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int order, + unsigned int *segment) +{ + unsigned int seg, order_iter; + int err; + + err = dr_buddy_find_free_seg(buddy, order, &seg, &order_iter); + if (err) + return err; + + bitmap_clear(buddy->bitmap[order_iter], seg, 1); + --buddy->num_free[order_iter]; + + /* If we found free memory in some order that is bigger than the + * required order, we need to split every order between the required + * order and the order that we found into two parts, and mark accordingly. + */ + while (order_iter > order) { + --order_iter; + seg <<= 1; + bitmap_set(buddy->bitmap[order_iter], seg ^ 1, 1); + ++buddy->num_free[order_iter]; + } + + seg <<= order; + *segment = seg; + + return 0; +} + +void mlx5dr_buddy_free_mem(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int seg, unsigned int order) +{ + seg >>= order; + + /* Whenever a segment is free, + * the mem is added to the buddy that gave it. + */ + while (test_bit(seg ^ 1, buddy->bitmap[order])) { + bitmap_clear(buddy->bitmap[order], seg ^ 1, 1); + --buddy->num_free[order]; + seg >>= 1; + ++order; + } + bitmap_set(buddy->bitmap[order], seg, 1); + + ++buddy->num_free[order]; +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c new file mode 100644 index 0000000000..8c2a34a0d6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c @@ -0,0 +1,970 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev, + bool other_vport, + u16 vport_number, + u64 *icm_address_rx, + u64 *icm_address_tx) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {}; + int err; + + MLX5_SET(query_esw_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT); + MLX5_SET(query_esw_vport_context_in, in, other_vport, other_vport); + MLX5_SET(query_esw_vport_context_in, in, vport_number, vport_number); + + err = mlx5_cmd_exec_inout(mdev, query_esw_vport_context, in, out); + if (err) + return err; + + *icm_address_rx = + MLX5_GET64(query_esw_vport_context_out, out, + esw_vport_context.sw_steering_vport_icm_address_rx); + *icm_address_tx = + MLX5_GET64(query_esw_vport_context_out, out, + esw_vport_context.sw_steering_vport_icm_address_tx); + return 0; +} + +int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_vport, + u16 vport_number, u16 *gvmi) +{ + bool ec_vf_func = other_vport ? mlx5_core_is_ec_vf_vport(mdev, vport_number) : false; + u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; + int out_size; + void *out; + int err; + + out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); + out = kzalloc(out_size, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); + MLX5_SET(query_hca_cap_in, in, other_function, other_vport); + MLX5_SET(query_hca_cap_in, in, function_id, mlx5_vport_to_func_id(mdev, vport_number, ec_vf_func)); + MLX5_SET(query_hca_cap_in, in, ec_vf_function, ec_vf_func); + MLX5_SET(query_hca_cap_in, in, op_mod, + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 | + HCA_CAP_OPMOD_GET_CUR); + + err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out); + if (err) { + kfree(out); + return err; + } + + *gvmi = MLX5_GET(query_hca_cap_out, out, capability.cmd_hca_cap.vhca_id); + + kfree(out); + return 0; +} + +int mlx5dr_cmd_query_esw_caps(struct mlx5_core_dev *mdev, + struct mlx5dr_esw_caps *caps) +{ + caps->drop_icm_address_rx = + MLX5_CAP64_ESW_FLOWTABLE(mdev, + sw_steering_fdb_action_drop_icm_address_rx); + caps->drop_icm_address_tx = + MLX5_CAP64_ESW_FLOWTABLE(mdev, + sw_steering_fdb_action_drop_icm_address_tx); + caps->uplink_icm_address_rx = + MLX5_CAP64_ESW_FLOWTABLE(mdev, + sw_steering_uplink_icm_address_rx); + caps->uplink_icm_address_tx = + MLX5_CAP64_ESW_FLOWTABLE(mdev, + sw_steering_uplink_icm_address_tx); + caps->sw_owner_v2 = MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, sw_owner_v2); + if (!caps->sw_owner_v2) + caps->sw_owner = MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, sw_owner); + + return 0; +} + +static int dr_cmd_query_nic_vport_roce_en(struct mlx5_core_dev *mdev, + u16 vport, bool *roce_en) +{ + u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {}; + int err; + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(query_nic_vport_context_in, in, other_vport, !!vport); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + *roce_en = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.roce_en); + return 0; +} + +int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, + struct mlx5dr_cmd_caps *caps) +{ + bool roce_en; + int err; + + caps->prio_tag_required = MLX5_CAP_GEN(mdev, prio_tag_required); + caps->eswitch_manager = MLX5_CAP_GEN(mdev, eswitch_manager); + caps->gvmi = MLX5_CAP_GEN(mdev, vhca_id); + caps->flex_protocols = MLX5_CAP_GEN(mdev, flex_parser_protocols); + caps->sw_format_ver = MLX5_CAP_GEN(mdev, steering_format_version); + caps->roce_caps.fl_rc_qp_when_roce_disabled = + MLX5_CAP_GEN(mdev, fl_rc_qp_when_roce_disabled); + + if (MLX5_CAP_GEN(mdev, roce)) { + err = dr_cmd_query_nic_vport_roce_en(mdev, 0, &roce_en); + if (err) + return err; + + caps->roce_caps.roce_en = roce_en; + caps->roce_caps.fl_rc_qp_when_roce_disabled |= + MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_disabled); + caps->roce_caps.fl_rc_qp_when_roce_enabled = + MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_enabled); + } + + caps->isolate_vl_tc = MLX5_CAP_GEN(mdev, isolate_vl_tc_new); + + caps->support_modify_argument = + MLX5_CAP_GEN_64(mdev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_HEADER_MODIFY_ARGUMENT; + + if (caps->support_modify_argument) { + caps->log_header_modify_argument_granularity = + MLX5_CAP_GEN(mdev, log_header_modify_argument_granularity); + caps->log_header_modify_argument_max_alloc = + MLX5_CAP_GEN(mdev, log_header_modify_argument_max_alloc); + } + + /* geneve_tlv_option_0_exist is the indication of + * STE support for lookup type flex_parser_ok + */ + caps->flex_parser_ok_bits_supp = + MLX5_CAP_FLOWTABLE(mdev, + flow_table_properties_nic_receive.ft_field_support.geneve_tlv_option_0_exist); + + if (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED) { + caps->flex_parser_id_icmp_dw0 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw0); + caps->flex_parser_id_icmp_dw1 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw1); + } + + if (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED) { + caps->flex_parser_id_icmpv6_dw0 = + MLX5_CAP_GEN(mdev, flex_parser_id_icmpv6_dw0); + caps->flex_parser_id_icmpv6_dw1 = + MLX5_CAP_GEN(mdev, flex_parser_id_icmpv6_dw1); + } + + if (caps->flex_protocols & MLX5_FLEX_PARSER_GENEVE_TLV_OPTION_0_ENABLED) + caps->flex_parser_id_geneve_tlv_option_0 = + MLX5_CAP_GEN(mdev, flex_parser_id_geneve_tlv_option_0); + + if (caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_GRE_ENABLED) + caps->flex_parser_id_mpls_over_gre = + MLX5_CAP_GEN(mdev, flex_parser_id_outer_first_mpls_over_gre); + + if (caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED) + caps->flex_parser_id_mpls_over_udp = + MLX5_CAP_GEN(mdev, flex_parser_id_outer_first_mpls_over_udp_label); + + if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_0_ENABLED) + caps->flex_parser_id_gtpu_dw_0 = + MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_dw_0); + + if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_TEID_ENABLED) + caps->flex_parser_id_gtpu_teid = + MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_teid); + + if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_2_ENABLED) + caps->flex_parser_id_gtpu_dw_2 = + MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_dw_2); + + if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_FIRST_EXT_DW_0_ENABLED) + caps->flex_parser_id_gtpu_first_ext_dw_0 = + MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_first_ext_dw_0); + + caps->nic_rx_drop_address = + MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_rx_action_drop_icm_address); + caps->nic_tx_drop_address = + MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_tx_action_drop_icm_address); + caps->nic_tx_allow_address = + MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_tx_action_allow_icm_address); + + caps->rx_sw_owner_v2 = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, sw_owner_v2); + caps->tx_sw_owner_v2 = MLX5_CAP_FLOWTABLE_NIC_TX(mdev, sw_owner_v2); + + if (!caps->rx_sw_owner_v2) + caps->rx_sw_owner = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, sw_owner); + if (!caps->tx_sw_owner_v2) + caps->tx_sw_owner = MLX5_CAP_FLOWTABLE_NIC_TX(mdev, sw_owner); + + caps->max_ft_level = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_ft_level); + + caps->log_icm_size = MLX5_CAP_DEV_MEM(mdev, log_steering_sw_icm_size); + caps->hdr_modify_icm_addr = + MLX5_CAP64_DEV_MEM(mdev, header_modify_sw_icm_start_address); + + caps->log_modify_pattern_icm_size = + MLX5_CAP_DEV_MEM(mdev, log_header_modify_pattern_sw_icm_size); + + caps->hdr_modify_pattern_icm_addr = + MLX5_CAP64_DEV_MEM(mdev, header_modify_pattern_sw_icm_start_address); + + caps->roce_min_src_udp = MLX5_CAP_ROCE(mdev, r_roce_min_src_udp_port); + + caps->is_ecpf = mlx5_core_is_ecpf_esw_manager(mdev); + + return 0; +} + +int mlx5dr_cmd_query_flow_table(struct mlx5_core_dev *dev, + enum fs_flow_table_type type, + u32 table_id, + struct mlx5dr_cmd_query_flow_table_details *output) +{ + u32 out[MLX5_ST_SZ_DW(query_flow_table_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_flow_table_in)] = {}; + int err; + + MLX5_SET(query_flow_table_in, in, opcode, + MLX5_CMD_OP_QUERY_FLOW_TABLE); + + MLX5_SET(query_flow_table_in, in, table_type, type); + MLX5_SET(query_flow_table_in, in, table_id, table_id); + + err = mlx5_cmd_exec_inout(dev, query_flow_table, in, out); + if (err) + return err; + + output->status = MLX5_GET(query_flow_table_out, out, status); + output->level = MLX5_GET(query_flow_table_out, out, flow_table_context.level); + + output->sw_owner_icm_root_1 = MLX5_GET64(query_flow_table_out, out, + flow_table_context.sw_owner_icm_root_1); + output->sw_owner_icm_root_0 = MLX5_GET64(query_flow_table_out, out, + flow_table_context.sw_owner_icm_root_0); + + return 0; +} + +int mlx5dr_cmd_query_flow_sampler(struct mlx5_core_dev *dev, + u32 sampler_id, + u64 *rx_icm_addr, + u64 *tx_icm_addr) +{ + u32 out[MLX5_ST_SZ_DW(query_sampler_obj_out)] = {}; + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + void *attr; + int ret; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_QUERY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_SAMPLER); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sampler_id); + + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (ret) + return ret; + + attr = MLX5_ADDR_OF(query_sampler_obj_out, out, sampler_object); + + *rx_icm_addr = MLX5_GET64(sampler_obj, attr, + sw_steering_icm_address_rx); + *tx_icm_addr = MLX5_GET64(sampler_obj, attr, + sw_steering_icm_address_tx); + + return 0; +} + +int mlx5dr_cmd_sync_steering(struct mlx5_core_dev *mdev) +{ + u32 in[MLX5_ST_SZ_DW(sync_steering_in)] = {}; + + /* Skip SYNC in case the device is internal error state. + * Besides a device error, this also happens when we're + * in fast teardown + */ + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + return 0; + + MLX5_SET(sync_steering_in, in, opcode, MLX5_CMD_OP_SYNC_STEERING); + + return mlx5_cmd_exec_in(mdev, sync_steering, in); +} + +int mlx5dr_cmd_set_fte_modify_and_vport(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 group_id, + u32 modify_header_id, + u16 vport) +{ + u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {}; + void *in_flow_context; + unsigned int inlen; + void *in_dests; + u32 *in; + int err; + + inlen = MLX5_ST_SZ_BYTES(set_fte_in) + + 1 * MLX5_ST_SZ_BYTES(dest_format_struct); /* One destination only */ + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY); + MLX5_SET(set_fte_in, in, table_type, table_type); + MLX5_SET(set_fte_in, in, table_id, table_id); + + in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); + MLX5_SET(flow_context, in_flow_context, group_id, group_id); + MLX5_SET(flow_context, in_flow_context, modify_header_id, modify_header_id); + MLX5_SET(flow_context, in_flow_context, destination_list_size, 1); + MLX5_SET(flow_context, in_flow_context, action, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR); + + in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); + MLX5_SET(dest_format_struct, in_dests, destination_type, + MLX5_IFC_FLOW_DESTINATION_TYPE_VPORT); + MLX5_SET(dest_format_struct, in_dests, destination_id, vport); + + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + kvfree(in); + + return err; +} + +int mlx5dr_cmd_del_flow_table_entry(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id) +{ + u32 in[MLX5_ST_SZ_DW(delete_fte_in)] = {}; + + MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); + MLX5_SET(delete_fte_in, in, table_type, table_type); + MLX5_SET(delete_fte_in, in, table_id, table_id); + + return mlx5_cmd_exec_in(mdev, delete_fte, in); +} + +int mlx5dr_cmd_alloc_modify_header(struct mlx5_core_dev *mdev, + u32 table_type, + u8 num_of_actions, + u64 *actions, + u32 *modify_header_id) +{ + u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)] = {}; + void *p_actions; + u32 inlen; + u32 *in; + int err; + + inlen = MLX5_ST_SZ_BYTES(alloc_modify_header_context_in) + + num_of_actions * sizeof(u64); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(alloc_modify_header_context_in, in, opcode, + MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(alloc_modify_header_context_in, in, table_type, table_type); + MLX5_SET(alloc_modify_header_context_in, in, num_of_actions, num_of_actions); + p_actions = MLX5_ADDR_OF(alloc_modify_header_context_in, in, actions); + memcpy(p_actions, actions, num_of_actions * sizeof(u64)); + + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + if (err) + goto out; + + *modify_header_id = MLX5_GET(alloc_modify_header_context_out, out, + modify_header_id); +out: + kvfree(in); + return err; +} + +int mlx5dr_cmd_dealloc_modify_header(struct mlx5_core_dev *mdev, + u32 modify_header_id) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)] = {}; + + MLX5_SET(dealloc_modify_header_context_in, in, opcode, + MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id, + modify_header_id); + + return mlx5_cmd_exec_in(mdev, dealloc_modify_header_context, in); +} + +int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 *group_id) +{ + u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {}; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + u32 *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_flow_group_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_GROUP); + MLX5_SET(create_flow_group_in, in, table_type, table_type); + MLX5_SET(create_flow_group_in, in, table_id, table_id); + + err = mlx5_cmd_exec_inout(mdev, create_flow_group, in, out); + if (err) + goto out; + + *group_id = MLX5_GET(create_flow_group_out, out, group_id); + +out: + kvfree(in); + return err; +} + +int mlx5dr_cmd_destroy_flow_group(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 group_id) +{ + u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)] = {}; + + MLX5_SET(destroy_flow_group_in, in, opcode, + MLX5_CMD_OP_DESTROY_FLOW_GROUP); + MLX5_SET(destroy_flow_group_in, in, table_type, table_type); + MLX5_SET(destroy_flow_group_in, in, table_id, table_id); + MLX5_SET(destroy_flow_group_in, in, group_id, group_id); + + return mlx5_cmd_exec_in(mdev, destroy_flow_group, in); +} + +int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev, + struct mlx5dr_cmd_create_flow_table_attr *attr, + u64 *fdb_rx_icm_addr, + u32 *table_id) +{ + u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {}; + u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {}; + void *ft_mdev; + int err; + + MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE); + MLX5_SET(create_flow_table_in, in, table_type, attr->table_type); + MLX5_SET(create_flow_table_in, in, uid, attr->uid); + + ft_mdev = MLX5_ADDR_OF(create_flow_table_in, in, flow_table_context); + MLX5_SET(flow_table_context, ft_mdev, termination_table, attr->term_tbl); + MLX5_SET(flow_table_context, ft_mdev, sw_owner, attr->sw_owner); + MLX5_SET(flow_table_context, ft_mdev, level, attr->level); + + if (attr->sw_owner) { + /* icm_addr_0 used for FDB RX / NIC TX / NIC_RX + * icm_addr_1 used for FDB TX + */ + if (attr->table_type == MLX5_FLOW_TABLE_TYPE_NIC_RX) { + MLX5_SET64(flow_table_context, ft_mdev, + sw_owner_icm_root_0, attr->icm_addr_rx); + } else if (attr->table_type == MLX5_FLOW_TABLE_TYPE_NIC_TX) { + MLX5_SET64(flow_table_context, ft_mdev, + sw_owner_icm_root_0, attr->icm_addr_tx); + } else if (attr->table_type == MLX5_FLOW_TABLE_TYPE_FDB) { + MLX5_SET64(flow_table_context, ft_mdev, + sw_owner_icm_root_0, attr->icm_addr_rx); + MLX5_SET64(flow_table_context, ft_mdev, + sw_owner_icm_root_1, attr->icm_addr_tx); + } + } + + MLX5_SET(create_flow_table_in, in, flow_table_context.decap_en, + attr->decap_en); + MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en, + attr->reformat_en); + + err = mlx5_cmd_exec_inout(mdev, create_flow_table, in, out); + if (err) + return err; + + *table_id = MLX5_GET(create_flow_table_out, out, table_id); + if (!attr->sw_owner && attr->table_type == MLX5_FLOW_TABLE_TYPE_FDB && + fdb_rx_icm_addr) + *fdb_rx_icm_addr = + (u64)MLX5_GET(create_flow_table_out, out, icm_address_31_0) | + (u64)MLX5_GET(create_flow_table_out, out, icm_address_39_32) << 32 | + (u64)MLX5_GET(create_flow_table_out, out, icm_address_63_40) << 40; + + return 0; +} + +int mlx5dr_cmd_destroy_flow_table(struct mlx5_core_dev *mdev, + u32 table_id, + u32 table_type) +{ + u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {}; + + MLX5_SET(destroy_flow_table_in, in, opcode, + MLX5_CMD_OP_DESTROY_FLOW_TABLE); + MLX5_SET(destroy_flow_table_in, in, table_type, table_type); + MLX5_SET(destroy_flow_table_in, in, table_id, table_id); + + return mlx5_cmd_exec_in(mdev, destroy_flow_table, in); +} + +int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev, + enum mlx5_reformat_ctx_type rt, + u8 reformat_param_0, + u8 reformat_param_1, + size_t reformat_size, + void *reformat_data, + u32 *reformat_id) +{ + u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)] = {}; + size_t inlen, cmd_data_sz, cmd_total_sz; + void *prctx; + void *pdata; + void *in; + int err; + + cmd_total_sz = MLX5_ST_SZ_BYTES(alloc_packet_reformat_context_in); + cmd_data_sz = MLX5_FLD_SZ_BYTES(alloc_packet_reformat_context_in, + packet_reformat_context.reformat_data); + inlen = ALIGN(cmd_total_sz + reformat_size - cmd_data_sz, 4); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(alloc_packet_reformat_context_in, in, opcode, + MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT); + + prctx = MLX5_ADDR_OF(alloc_packet_reformat_context_in, in, packet_reformat_context); + pdata = MLX5_ADDR_OF(packet_reformat_context_in, prctx, reformat_data); + + MLX5_SET(packet_reformat_context_in, prctx, reformat_type, rt); + MLX5_SET(packet_reformat_context_in, prctx, reformat_param_0, reformat_param_0); + MLX5_SET(packet_reformat_context_in, prctx, reformat_param_1, reformat_param_1); + MLX5_SET(packet_reformat_context_in, prctx, reformat_data_size, reformat_size); + if (reformat_data && reformat_size) + memcpy(pdata, reformat_data, reformat_size); + + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + if (err) + goto err_free_in; + + *reformat_id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id); + +err_free_in: + kvfree(in); + return err; +} + +void mlx5dr_cmd_destroy_reformat_ctx(struct mlx5_core_dev *mdev, + u32 reformat_id) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)] = {}; + + MLX5_SET(dealloc_packet_reformat_context_in, in, opcode, + MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id, + reformat_id); + + mlx5_cmd_exec_in(mdev, dealloc_packet_reformat_context, in); +} + +static void dr_cmd_set_definer_format(void *ptr, u16 format_id, + u8 *dw_selectors, + u8 *byte_selectors) +{ + if (format_id != MLX5_IFC_DEFINER_FORMAT_ID_SELECT) + return; + + MLX5_SET(match_definer, ptr, format_select_dw0, dw_selectors[0]); + MLX5_SET(match_definer, ptr, format_select_dw1, dw_selectors[1]); + MLX5_SET(match_definer, ptr, format_select_dw2, dw_selectors[2]); + MLX5_SET(match_definer, ptr, format_select_dw3, dw_selectors[3]); + MLX5_SET(match_definer, ptr, format_select_dw4, dw_selectors[4]); + MLX5_SET(match_definer, ptr, format_select_dw5, dw_selectors[5]); + MLX5_SET(match_definer, ptr, format_select_dw6, dw_selectors[6]); + MLX5_SET(match_definer, ptr, format_select_dw7, dw_selectors[7]); + MLX5_SET(match_definer, ptr, format_select_dw8, dw_selectors[8]); + + MLX5_SET(match_definer, ptr, format_select_byte0, byte_selectors[0]); + MLX5_SET(match_definer, ptr, format_select_byte1, byte_selectors[1]); + MLX5_SET(match_definer, ptr, format_select_byte2, byte_selectors[2]); + MLX5_SET(match_definer, ptr, format_select_byte3, byte_selectors[3]); + MLX5_SET(match_definer, ptr, format_select_byte4, byte_selectors[4]); + MLX5_SET(match_definer, ptr, format_select_byte5, byte_selectors[5]); + MLX5_SET(match_definer, ptr, format_select_byte6, byte_selectors[6]); + MLX5_SET(match_definer, ptr, format_select_byte7, byte_selectors[7]); +} + +int mlx5dr_cmd_create_definer(struct mlx5_core_dev *mdev, + u16 format_id, + u8 *dw_selectors, + u8 *byte_selectors, + u8 *match_mask, + u32 *definer_id) +{ + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(create_match_definer_in)] = {}; + void *ptr; + int err; + + ptr = MLX5_ADDR_OF(create_match_definer_in, in, + general_obj_in_cmd_hdr); + MLX5_SET(general_obj_in_cmd_hdr, ptr, opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, ptr, obj_type, + MLX5_OBJ_TYPE_MATCH_DEFINER); + + ptr = MLX5_ADDR_OF(create_match_definer_in, in, obj_context); + MLX5_SET(match_definer, ptr, format_id, format_id); + + dr_cmd_set_definer_format(ptr, format_id, + dw_selectors, byte_selectors); + + ptr = MLX5_ADDR_OF(match_definer, ptr, match_mask); + memcpy(ptr, match_mask, MLX5_FLD_SZ_BYTES(match_definer, match_mask)); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + *definer_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return 0; +} + +void +mlx5dr_cmd_destroy_definer(struct mlx5_core_dev *mdev, u32 definer_id) +{ + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_MATCH_DEFINER); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, definer_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num, + u16 index, struct mlx5dr_cmd_gid_attr *attr) +{ + u32 out[MLX5_ST_SZ_DW(query_roce_address_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_roce_address_in)] = {}; + int err; + + MLX5_SET(query_roce_address_in, in, opcode, + MLX5_CMD_OP_QUERY_ROCE_ADDRESS); + + MLX5_SET(query_roce_address_in, in, roce_address_index, index); + MLX5_SET(query_roce_address_in, in, vhca_port_num, vhca_port_num); + + err = mlx5_cmd_exec_inout(mdev, query_roce_address, in, out); + if (err) + return err; + + memcpy(&attr->gid, + MLX5_ADDR_OF(query_roce_address_out, + out, roce_address.source_l3_address), + sizeof(attr->gid)); + memcpy(attr->mac, + MLX5_ADDR_OF(query_roce_address_out, out, + roce_address.source_mac_47_32), + sizeof(attr->mac)); + + if (MLX5_GET(query_roce_address_out, out, + roce_address.roce_version) == MLX5_ROCE_VERSION_2) + attr->roce_ver = MLX5_ROCE_VERSION_2; + else + attr->roce_ver = MLX5_ROCE_VERSION_1; + + return 0; +} + +int mlx5dr_cmd_create_modify_header_arg(struct mlx5_core_dev *dev, + u16 log_obj_range, u32 pd, + u32 *obj_id) +{ + u32 in[MLX5_ST_SZ_DW(create_modify_header_arg_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + void *attr; + int ret; + + attr = MLX5_ADDR_OF(create_modify_header_arg_in, in, hdr); + MLX5_SET(general_obj_in_cmd_hdr, attr, opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, attr, obj_type, + MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT); + MLX5_SET(general_obj_in_cmd_hdr, attr, + op_param.create.log_obj_range, log_obj_range); + + attr = MLX5_ADDR_OF(create_modify_header_arg_in, in, arg); + MLX5_SET(modify_header_arg, attr, access_pd, pd); + + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (ret) + return ret; + + *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + return 0; +} + +void mlx5dr_cmd_destroy_modify_header_arg(struct mlx5_core_dev *dev, + u32 obj_id) +{ + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); + + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5dr_cmd_set_extended_dest(struct mlx5_core_dev *dev, + struct mlx5dr_cmd_fte_info *fte, + bool *extended_dest) +{ + int fw_log_max_fdb_encap_uplink = MLX5_CAP_ESW(dev, log_max_fdb_encap_uplink); + int num_fwd_destinations = 0; + int num_encap = 0; + int i; + + *extended_dest = false; + if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) + return 0; + for (i = 0; i < fte->dests_size; i++) { + if (fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_COUNTER || + fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_NONE) + continue; + if ((fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_VPORT || + fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) && + fte->dest_arr[i].vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) + num_encap++; + num_fwd_destinations++; + } + + if (num_fwd_destinations > 1 && num_encap > 0) + *extended_dest = true; + + if (*extended_dest && !fw_log_max_fdb_encap_uplink) { + mlx5_core_warn(dev, "FW does not support extended destination"); + return -EOPNOTSUPP; + } + if (num_encap > (1 << fw_log_max_fdb_encap_uplink)) { + mlx5_core_warn(dev, "FW does not support more than %d encaps", + 1 << fw_log_max_fdb_encap_uplink); + return -EOPNOTSUPP; + } + + return 0; +} + +int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev, + int opmod, int modify_mask, + struct mlx5dr_cmd_ft_info *ft, + u32 group_id, + struct mlx5dr_cmd_fte_info *fte) +{ + u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {}; + void *in_flow_context, *vlan; + bool extended_dest = false; + void *in_match_value; + unsigned int inlen; + int dst_cnt_size; + void *in_dests; + u32 *in; + int err; + int i; + + if (mlx5dr_cmd_set_extended_dest(dev, fte, &extended_dest)) + return -EOPNOTSUPP; + + if (!extended_dest) + dst_cnt_size = MLX5_ST_SZ_BYTES(dest_format_struct); + else + dst_cnt_size = MLX5_ST_SZ_BYTES(extended_dest_format); + + inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fte->dests_size * dst_cnt_size; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY); + MLX5_SET(set_fte_in, in, op_mod, opmod); + MLX5_SET(set_fte_in, in, modify_enable_mask, modify_mask); + MLX5_SET(set_fte_in, in, table_type, ft->type); + MLX5_SET(set_fte_in, in, table_id, ft->id); + MLX5_SET(set_fte_in, in, flow_index, fte->index); + MLX5_SET(set_fte_in, in, ignore_flow_level, fte->ignore_flow_level); + if (ft->vport) { + MLX5_SET(set_fte_in, in, vport_number, ft->vport); + MLX5_SET(set_fte_in, in, other_vport, 1); + } + + in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); + MLX5_SET(flow_context, in_flow_context, group_id, group_id); + + MLX5_SET(flow_context, in_flow_context, flow_tag, + fte->flow_context.flow_tag); + MLX5_SET(flow_context, in_flow_context, flow_source, + fte->flow_context.flow_source); + + MLX5_SET(flow_context, in_flow_context, extended_destination, + extended_dest); + if (extended_dest) { + u32 action; + + action = fte->action.action & + ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + MLX5_SET(flow_context, in_flow_context, action, action); + } else { + MLX5_SET(flow_context, in_flow_context, action, + fte->action.action); + if (fte->action.pkt_reformat) + MLX5_SET(flow_context, in_flow_context, packet_reformat_id, + fte->action.pkt_reformat->id); + } + if (fte->action.modify_hdr) + MLX5_SET(flow_context, in_flow_context, modify_header_id, + fte->action.modify_hdr->id); + + vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan); + + MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[0].ethtype); + MLX5_SET(vlan, vlan, vid, fte->action.vlan[0].vid); + MLX5_SET(vlan, vlan, prio, fte->action.vlan[0].prio); + + vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan_2); + + MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[1].ethtype); + MLX5_SET(vlan, vlan, vid, fte->action.vlan[1].vid); + MLX5_SET(vlan, vlan, prio, fte->action.vlan[1].prio); + + in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context, + match_value); + memcpy(in_match_value, fte->val, sizeof(u32) * MLX5_ST_SZ_DW_MATCH_PARAM); + + in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + int list_size = 0; + + for (i = 0; i < fte->dests_size; i++) { + enum mlx5_flow_destination_type type = fte->dest_arr[i].type; + enum mlx5_ifc_flow_destination_type ifc_type; + unsigned int id; + + if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + + switch (type) { + case MLX5_FLOW_DESTINATION_TYPE_NONE: + continue; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: + id = fte->dest_arr[i].ft_num; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE; + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: + id = fte->dest_arr[i].ft_id; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_TABLE; + + break; + case MLX5_FLOW_DESTINATION_TYPE_UPLINK: + case MLX5_FLOW_DESTINATION_TYPE_VPORT: + if (type == MLX5_FLOW_DESTINATION_TYPE_VPORT) { + id = fte->dest_arr[i].vport.num; + MLX5_SET(dest_format_struct, in_dests, + destination_eswitch_owner_vhca_id_valid, + !!(fte->dest_arr[i].vport.flags & + MLX5_FLOW_DEST_VPORT_VHCA_ID)); + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_VPORT; + } else { + id = 0; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_UPLINK; + MLX5_SET(dest_format_struct, in_dests, + destination_eswitch_owner_vhca_id_valid, 1); + } + MLX5_SET(dest_format_struct, in_dests, + destination_eswitch_owner_vhca_id, + fte->dest_arr[i].vport.vhca_id); + if (extended_dest && (fte->dest_arr[i].vport.flags & + MLX5_FLOW_DEST_VPORT_REFORMAT_ID)) { + MLX5_SET(dest_format_struct, in_dests, + packet_reformat, + !!(fte->dest_arr[i].vport.flags & + MLX5_FLOW_DEST_VPORT_REFORMAT_ID)); + MLX5_SET(extended_dest_format, in_dests, + packet_reformat_id, + fte->dest_arr[i].vport.reformat_id); + } + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER: + id = fte->dest_arr[i].sampler_id; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_SAMPLER; + break; + default: + id = fte->dest_arr[i].tir_num; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_TIR; + } + + MLX5_SET(dest_format_struct, in_dests, destination_type, + ifc_type); + MLX5_SET(dest_format_struct, in_dests, destination_id, id); + in_dests += dst_cnt_size; + list_size++; + } + + MLX5_SET(flow_context, in_flow_context, destination_list_size, + list_size); + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + int max_list_size = BIT(MLX5_CAP_FLOWTABLE_TYPE(dev, + log_max_flow_counter, + ft->type)); + int list_size = 0; + + for (i = 0; i < fte->dests_size; i++) { + if (fte->dest_arr[i].type != + MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + + MLX5_SET(flow_counter_list, in_dests, flow_counter_id, + fte->dest_arr[i].counter_id); + in_dests += dst_cnt_size; + list_size++; + } + if (list_size > max_list_size) { + err = -EINVAL; + goto err_out; + } + + MLX5_SET(flow_context, in_flow_context, flow_counter_list_size, + list_size); + } + + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +err_out: + kvfree(in); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c new file mode 100644 index 0000000000..7e36e10621 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c @@ -0,0 +1,720 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include +#include +#include +#include +#include "dr_types.h" + +#define DR_DBG_PTR_TO_ID(p) ((u64)(uintptr_t)(p) & 0xFFFFFFFFULL) + +enum dr_dump_rec_type { + DR_DUMP_REC_TYPE_DOMAIN = 3000, + DR_DUMP_REC_TYPE_DOMAIN_INFO_FLEX_PARSER = 3001, + DR_DUMP_REC_TYPE_DOMAIN_INFO_DEV_ATTR = 3002, + DR_DUMP_REC_TYPE_DOMAIN_INFO_VPORT = 3003, + DR_DUMP_REC_TYPE_DOMAIN_INFO_CAPS = 3004, + DR_DUMP_REC_TYPE_DOMAIN_SEND_RING = 3005, + + DR_DUMP_REC_TYPE_TABLE = 3100, + DR_DUMP_REC_TYPE_TABLE_RX = 3101, + DR_DUMP_REC_TYPE_TABLE_TX = 3102, + + DR_DUMP_REC_TYPE_MATCHER = 3200, + DR_DUMP_REC_TYPE_MATCHER_MASK_DEPRECATED = 3201, + DR_DUMP_REC_TYPE_MATCHER_RX = 3202, + DR_DUMP_REC_TYPE_MATCHER_TX = 3203, + DR_DUMP_REC_TYPE_MATCHER_BUILDER = 3204, + DR_DUMP_REC_TYPE_MATCHER_MASK = 3205, + + DR_DUMP_REC_TYPE_RULE = 3300, + DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V0 = 3301, + DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V0 = 3302, + DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V1 = 3303, + DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V1 = 3304, + + DR_DUMP_REC_TYPE_ACTION_ENCAP_L2 = 3400, + DR_DUMP_REC_TYPE_ACTION_ENCAP_L3 = 3401, + DR_DUMP_REC_TYPE_ACTION_MODIFY_HDR = 3402, + DR_DUMP_REC_TYPE_ACTION_DROP = 3403, + DR_DUMP_REC_TYPE_ACTION_QP = 3404, + DR_DUMP_REC_TYPE_ACTION_FT = 3405, + DR_DUMP_REC_TYPE_ACTION_CTR = 3406, + DR_DUMP_REC_TYPE_ACTION_TAG = 3407, + DR_DUMP_REC_TYPE_ACTION_VPORT = 3408, + DR_DUMP_REC_TYPE_ACTION_DECAP_L2 = 3409, + DR_DUMP_REC_TYPE_ACTION_DECAP_L3 = 3410, + DR_DUMP_REC_TYPE_ACTION_DEVX_TIR = 3411, + DR_DUMP_REC_TYPE_ACTION_PUSH_VLAN = 3412, + DR_DUMP_REC_TYPE_ACTION_POP_VLAN = 3413, + DR_DUMP_REC_TYPE_ACTION_SAMPLER = 3415, + DR_DUMP_REC_TYPE_ACTION_INSERT_HDR = 3420, + DR_DUMP_REC_TYPE_ACTION_REMOVE_HDR = 3421, + DR_DUMP_REC_TYPE_ACTION_MATCH_RANGE = 3425, +}; + +void mlx5dr_dbg_tbl_add(struct mlx5dr_table *tbl) +{ + mutex_lock(&tbl->dmn->dump_info.dbg_mutex); + list_add_tail(&tbl->dbg_node, &tbl->dmn->dbg_tbl_list); + mutex_unlock(&tbl->dmn->dump_info.dbg_mutex); +} + +void mlx5dr_dbg_tbl_del(struct mlx5dr_table *tbl) +{ + mutex_lock(&tbl->dmn->dump_info.dbg_mutex); + list_del(&tbl->dbg_node); + mutex_unlock(&tbl->dmn->dump_info.dbg_mutex); +} + +void mlx5dr_dbg_rule_add(struct mlx5dr_rule *rule) +{ + struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn; + + mutex_lock(&dmn->dump_info.dbg_mutex); + list_add_tail(&rule->dbg_node, &rule->matcher->dbg_rule_list); + mutex_unlock(&dmn->dump_info.dbg_mutex); +} + +void mlx5dr_dbg_rule_del(struct mlx5dr_rule *rule) +{ + struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn; + + mutex_lock(&dmn->dump_info.dbg_mutex); + list_del(&rule->dbg_node); + mutex_unlock(&dmn->dump_info.dbg_mutex); +} + +static u64 dr_dump_icm_to_idx(u64 icm_addr) +{ + return (icm_addr >> 6) & 0xffffffff; +} + +#define DR_HEX_SIZE 256 + +static void +dr_dump_hex_print(char hex[DR_HEX_SIZE], char *src, u32 size) +{ + if (WARN_ON_ONCE(DR_HEX_SIZE < 2 * size + 1)) + size = DR_HEX_SIZE / 2 - 1; /* truncate */ + + bin2hex(hex, src, size); + hex[2 * size] = 0; /* NULL-terminate */ +} + +static int +dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id, + struct mlx5dr_rule_action_member *action_mem) +{ + struct mlx5dr_action *action = action_mem->action; + const u64 action_id = DR_DBG_PTR_TO_ID(action); + u64 hit_tbl_ptr, miss_tbl_ptr; + u32 hit_tbl_id, miss_tbl_id; + + switch (action->action_type) { + case DR_ACTION_TYP_DROP: + seq_printf(file, "%d,0x%llx,0x%llx\n", + DR_DUMP_REC_TYPE_ACTION_DROP, action_id, rule_id); + break; + case DR_ACTION_TYP_FT: + if (action->dest_tbl->is_fw_tbl) + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_FT, action_id, + rule_id, action->dest_tbl->fw_tbl.id, + -1); + else + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%llx\n", + DR_DUMP_REC_TYPE_ACTION_FT, action_id, + rule_id, action->dest_tbl->tbl->table_id, + DR_DBG_PTR_TO_ID(action->dest_tbl->tbl)); + + break; + case DR_ACTION_TYP_CTR: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_CTR, action_id, rule_id, + action->ctr->ctr_id + action->ctr->offset); + break; + case DR_ACTION_TYP_TAG: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_TAG, action_id, rule_id, + action->flow_tag->flow_tag); + break; + case DR_ACTION_TYP_MODIFY_HDR: + { + struct mlx5dr_ptrn_obj *ptrn = action->rewrite->ptrn; + struct mlx5dr_arg_obj *arg = action->rewrite->arg; + u8 *rewrite_data = action->rewrite->data; + bool ptrn_arg; + int i; + + ptrn_arg = !action->rewrite->single_action_opt && ptrn && arg; + + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,%d,0x%x,0x%x,0x%x", + DR_DUMP_REC_TYPE_ACTION_MODIFY_HDR, action_id, + rule_id, action->rewrite->index, + action->rewrite->single_action_opt, + ptrn_arg ? action->rewrite->num_of_actions : 0, + ptrn_arg ? ptrn->index : 0, + ptrn_arg ? mlx5dr_arg_get_obj_id(arg) : 0); + + if (ptrn_arg) { + for (i = 0; i < action->rewrite->num_of_actions; i++) { + seq_printf(file, ",0x%016llx", + be64_to_cpu(((__be64 *)rewrite_data)[i])); + } + } + + seq_puts(file, "\n"); + break; + } + case DR_ACTION_TYP_VPORT: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_VPORT, action_id, rule_id, + action->vport->caps->num); + break; + case DR_ACTION_TYP_TNL_L2_TO_L2: + seq_printf(file, "%d,0x%llx,0x%llx\n", + DR_DUMP_REC_TYPE_ACTION_DECAP_L2, action_id, + rule_id); + break; + case DR_ACTION_TYP_TNL_L3_TO_L2: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_DECAP_L3, action_id, + rule_id, + (action->rewrite->ptrn && action->rewrite->arg) ? + mlx5dr_arg_get_obj_id(action->rewrite->arg) : + action->rewrite->index); + break; + case DR_ACTION_TYP_L2_TO_TNL_L2: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_ENCAP_L2, action_id, + rule_id, action->reformat->id); + break; + case DR_ACTION_TYP_L2_TO_TNL_L3: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_ENCAP_L3, action_id, + rule_id, action->reformat->id); + break; + case DR_ACTION_TYP_POP_VLAN: + seq_printf(file, "%d,0x%llx,0x%llx\n", + DR_DUMP_REC_TYPE_ACTION_POP_VLAN, action_id, + rule_id); + break; + case DR_ACTION_TYP_PUSH_VLAN: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_PUSH_VLAN, action_id, + rule_id, action->push_vlan->vlan_hdr); + break; + case DR_ACTION_TYP_INSERT_HDR: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_INSERT_HDR, action_id, + rule_id, action->reformat->id, + action->reformat->param_0, + action->reformat->param_1); + break; + case DR_ACTION_TYP_REMOVE_HDR: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_REMOVE_HDR, action_id, + rule_id, action->reformat->id, + action->reformat->param_0, + action->reformat->param_1); + break; + case DR_ACTION_TYP_SAMPLER: + seq_printf(file, + "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x,0x%llx,0x%llx\n", + DR_DUMP_REC_TYPE_ACTION_SAMPLER, action_id, rule_id, + 0, 0, action->sampler->sampler_id, + action->sampler->rx_icm_addr, + action->sampler->tx_icm_addr); + break; + case DR_ACTION_TYP_RANGE: + if (action->range->hit_tbl_action->dest_tbl->is_fw_tbl) { + hit_tbl_id = action->range->hit_tbl_action->dest_tbl->fw_tbl.id; + hit_tbl_ptr = 0; + } else { + hit_tbl_id = action->range->hit_tbl_action->dest_tbl->tbl->table_id; + hit_tbl_ptr = + DR_DBG_PTR_TO_ID(action->range->hit_tbl_action->dest_tbl->tbl); + } + + if (action->range->miss_tbl_action->dest_tbl->is_fw_tbl) { + miss_tbl_id = action->range->miss_tbl_action->dest_tbl->fw_tbl.id; + miss_tbl_ptr = 0; + } else { + miss_tbl_id = action->range->miss_tbl_action->dest_tbl->tbl->table_id; + miss_tbl_ptr = + DR_DBG_PTR_TO_ID(action->range->miss_tbl_action->dest_tbl->tbl); + } + + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%llx,0x%x,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_MATCH_RANGE, action_id, rule_id, + hit_tbl_id, hit_tbl_ptr, miss_tbl_id, miss_tbl_ptr, + action->range->definer_id); + break; + default: + return 0; + } + + return 0; +} + +static int +dr_dump_rule_mem(struct seq_file *file, struct mlx5dr_ste *ste, + bool is_rx, const u64 rule_id, u8 format_ver) +{ + char hw_ste_dump[DR_HEX_SIZE]; + u32 mem_rec_type; + + if (format_ver == MLX5_STEERING_FORMAT_CONNECTX_5) { + mem_rec_type = is_rx ? DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V0 : + DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V0; + } else { + mem_rec_type = is_rx ? DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V1 : + DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V1; + } + + dr_dump_hex_print(hw_ste_dump, (char *)mlx5dr_ste_get_hw_ste(ste), + DR_STE_SIZE_REDUCED); + + seq_printf(file, "%d,0x%llx,0x%llx,%s\n", mem_rec_type, + dr_dump_icm_to_idx(mlx5dr_ste_get_icm_addr(ste)), rule_id, + hw_ste_dump); + + return 0; +} + +static int +dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx, + bool is_rx, const u64 rule_id, u8 format_ver) +{ + struct mlx5dr_ste *ste_arr[DR_RULE_MAX_STES + DR_ACTION_MAX_STES]; + struct mlx5dr_ste *curr_ste = rule_rx_tx->last_rule_ste; + int ret, i; + + if (mlx5dr_rule_get_reverse_rule_members(ste_arr, curr_ste, &i)) + return 0; + + while (i--) { + ret = dr_dump_rule_mem(file, ste_arr[i], is_rx, rule_id, + format_ver); + if (ret < 0) + return ret; + } + + return 0; +} + +static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule) +{ + struct mlx5dr_rule_action_member *action_mem; + const u64 rule_id = DR_DBG_PTR_TO_ID(rule); + struct mlx5dr_rule_rx_tx *rx = &rule->rx; + struct mlx5dr_rule_rx_tx *tx = &rule->tx; + u8 format_ver; + int ret; + + format_ver = rule->matcher->tbl->dmn->info.caps.sw_format_ver; + + seq_printf(file, "%d,0x%llx,0x%llx\n", DR_DUMP_REC_TYPE_RULE, rule_id, + DR_DBG_PTR_TO_ID(rule->matcher)); + + if (rx->nic_matcher) { + ret = dr_dump_rule_rx_tx(file, rx, true, rule_id, format_ver); + if (ret < 0) + return ret; + } + + if (tx->nic_matcher) { + ret = dr_dump_rule_rx_tx(file, tx, false, rule_id, format_ver); + if (ret < 0) + return ret; + } + + list_for_each_entry(action_mem, &rule->rule_actions_list, list) { + ret = dr_dump_rule_action_mem(file, rule_id, action_mem); + if (ret < 0) + return ret; + } + + return 0; +} + +static int +dr_dump_matcher_mask(struct seq_file *file, struct mlx5dr_match_param *mask, + u8 criteria, const u64 matcher_id) +{ + char dump[DR_HEX_SIZE]; + + seq_printf(file, "%d,0x%llx,", DR_DUMP_REC_TYPE_MATCHER_MASK, + matcher_id); + + if (criteria & DR_MATCHER_CRITERIA_OUTER) { + dr_dump_hex_print(dump, (char *)&mask->outer, sizeof(mask->outer)); + seq_printf(file, "%s,", dump); + } else { + seq_puts(file, ","); + } + + if (criteria & DR_MATCHER_CRITERIA_INNER) { + dr_dump_hex_print(dump, (char *)&mask->inner, sizeof(mask->inner)); + seq_printf(file, "%s,", dump); + } else { + seq_puts(file, ","); + } + + if (criteria & DR_MATCHER_CRITERIA_MISC) { + dr_dump_hex_print(dump, (char *)&mask->misc, sizeof(mask->misc)); + seq_printf(file, "%s,", dump); + } else { + seq_puts(file, ","); + } + + if (criteria & DR_MATCHER_CRITERIA_MISC2) { + dr_dump_hex_print(dump, (char *)&mask->misc2, sizeof(mask->misc2)); + seq_printf(file, "%s,", dump); + } else { + seq_puts(file, ","); + } + + if (criteria & DR_MATCHER_CRITERIA_MISC3) { + dr_dump_hex_print(dump, (char *)&mask->misc3, sizeof(mask->misc3)); + seq_printf(file, "%s\n", dump); + } else { + seq_puts(file, ",\n"); + } + + return 0; +} + +static int +dr_dump_matcher_builder(struct seq_file *file, struct mlx5dr_ste_build *builder, + u32 index, bool is_rx, const u64 matcher_id) +{ + seq_printf(file, "%d,0x%llx,%d,%d,0x%x\n", + DR_DUMP_REC_TYPE_MATCHER_BUILDER, matcher_id, index, is_rx, + builder->lu_type); + + return 0; +} + +static int +dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx, + struct mlx5dr_matcher_rx_tx *matcher_rx_tx, + const u64 matcher_id) +{ + enum dr_dump_rec_type rec_type; + u64 s_icm_addr, e_icm_addr; + int i, ret; + + rec_type = is_rx ? DR_DUMP_REC_TYPE_MATCHER_RX : + DR_DUMP_REC_TYPE_MATCHER_TX; + + s_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(matcher_rx_tx->s_htbl->chunk); + e_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(matcher_rx_tx->e_anchor->chunk); + seq_printf(file, "%d,0x%llx,0x%llx,%d,0x%llx,0x%llx\n", + rec_type, DR_DBG_PTR_TO_ID(matcher_rx_tx), + matcher_id, matcher_rx_tx->num_of_builders, + dr_dump_icm_to_idx(s_icm_addr), + dr_dump_icm_to_idx(e_icm_addr)); + + for (i = 0; i < matcher_rx_tx->num_of_builders; i++) { + ret = dr_dump_matcher_builder(file, + &matcher_rx_tx->ste_builder[i], + i, is_rx, matcher_id); + if (ret < 0) + return ret; + } + + return 0; +} + +static int +dr_dump_matcher(struct seq_file *file, struct mlx5dr_matcher *matcher) +{ + struct mlx5dr_matcher_rx_tx *rx = &matcher->rx; + struct mlx5dr_matcher_rx_tx *tx = &matcher->tx; + u64 matcher_id; + int ret; + + matcher_id = DR_DBG_PTR_TO_ID(matcher); + + seq_printf(file, "%d,0x%llx,0x%llx,%d\n", DR_DUMP_REC_TYPE_MATCHER, + matcher_id, DR_DBG_PTR_TO_ID(matcher->tbl), matcher->prio); + + ret = dr_dump_matcher_mask(file, &matcher->mask, + matcher->match_criteria, matcher_id); + if (ret < 0) + return ret; + + if (rx->nic_tbl) { + ret = dr_dump_matcher_rx_tx(file, true, rx, matcher_id); + if (ret < 0) + return ret; + } + + if (tx->nic_tbl) { + ret = dr_dump_matcher_rx_tx(file, false, tx, matcher_id); + if (ret < 0) + return ret; + } + + return 0; +} + +static int +dr_dump_matcher_all(struct seq_file *file, struct mlx5dr_matcher *matcher) +{ + struct mlx5dr_rule *rule; + int ret; + + ret = dr_dump_matcher(file, matcher); + if (ret < 0) + return ret; + + list_for_each_entry(rule, &matcher->dbg_rule_list, dbg_node) { + ret = dr_dump_rule(file, rule); + if (ret < 0) + return ret; + } + + return 0; +} + +static int +dr_dump_table_rx_tx(struct seq_file *file, bool is_rx, + struct mlx5dr_table_rx_tx *table_rx_tx, + const u64 table_id) +{ + enum dr_dump_rec_type rec_type; + u64 s_icm_addr; + + rec_type = is_rx ? DR_DUMP_REC_TYPE_TABLE_RX : + DR_DUMP_REC_TYPE_TABLE_TX; + + s_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(table_rx_tx->s_anchor->chunk); + seq_printf(file, "%d,0x%llx,0x%llx\n", rec_type, table_id, + dr_dump_icm_to_idx(s_icm_addr)); + + return 0; +} + +static int dr_dump_table(struct seq_file *file, struct mlx5dr_table *table) +{ + struct mlx5dr_table_rx_tx *rx = &table->rx; + struct mlx5dr_table_rx_tx *tx = &table->tx; + int ret; + + seq_printf(file, "%d,0x%llx,0x%llx,%d,%d\n", DR_DUMP_REC_TYPE_TABLE, + DR_DBG_PTR_TO_ID(table), DR_DBG_PTR_TO_ID(table->dmn), + table->table_type, table->level); + + if (rx->nic_dmn) { + ret = dr_dump_table_rx_tx(file, true, rx, + DR_DBG_PTR_TO_ID(table)); + if (ret < 0) + return ret; + } + + if (tx->nic_dmn) { + ret = dr_dump_table_rx_tx(file, false, tx, + DR_DBG_PTR_TO_ID(table)); + if (ret < 0) + return ret; + } + return 0; +} + +static int dr_dump_table_all(struct seq_file *file, struct mlx5dr_table *tbl) +{ + struct mlx5dr_matcher *matcher; + int ret; + + ret = dr_dump_table(file, tbl); + if (ret < 0) + return ret; + + list_for_each_entry(matcher, &tbl->matcher_list, list_node) { + ret = dr_dump_matcher_all(file, matcher); + if (ret < 0) + return ret; + } + return 0; +} + +static int +dr_dump_send_ring(struct seq_file *file, struct mlx5dr_send_ring *ring, + const u64 domain_id) +{ + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x\n", + DR_DUMP_REC_TYPE_DOMAIN_SEND_RING, DR_DBG_PTR_TO_ID(ring), + domain_id, ring->cq->mcq.cqn, ring->qp->qpn); + return 0; +} + +static int +dr_dump_domain_info_flex_parser(struct seq_file *file, + const char *flex_parser_name, + const u8 flex_parser_value, + const u64 domain_id) +{ + seq_printf(file, "%d,0x%llx,%s,0x%x\n", + DR_DUMP_REC_TYPE_DOMAIN_INFO_FLEX_PARSER, domain_id, + flex_parser_name, flex_parser_value); + return 0; +} + +static int +dr_dump_domain_info_caps(struct seq_file *file, struct mlx5dr_cmd_caps *caps, + const u64 domain_id) +{ + struct mlx5dr_cmd_vport_cap *vport_caps; + unsigned long i, vports_num; + + xa_for_each(&caps->vports.vports_caps_xa, vports_num, vport_caps) + ; /* count the number of vports in xarray */ + + seq_printf(file, "%d,0x%llx,0x%x,0x%llx,0x%llx,0x%x,%lu,%d\n", + DR_DUMP_REC_TYPE_DOMAIN_INFO_CAPS, domain_id, caps->gvmi, + caps->nic_rx_drop_address, caps->nic_tx_drop_address, + caps->flex_protocols, vports_num, caps->eswitch_manager); + + xa_for_each(&caps->vports.vports_caps_xa, i, vport_caps) { + vport_caps = xa_load(&caps->vports.vports_caps_xa, i); + + seq_printf(file, "%d,0x%llx,%lu,0x%x,0x%llx,0x%llx\n", + DR_DUMP_REC_TYPE_DOMAIN_INFO_VPORT, domain_id, i, + vport_caps->vport_gvmi, vport_caps->icm_address_rx, + vport_caps->icm_address_tx); + } + return 0; +} + +static int +dr_dump_domain_info(struct seq_file *file, struct mlx5dr_domain_info *info, + const u64 domain_id) +{ + int ret; + + ret = dr_dump_domain_info_caps(file, &info->caps, domain_id); + if (ret < 0) + return ret; + + ret = dr_dump_domain_info_flex_parser(file, "icmp_dw0", + info->caps.flex_parser_id_icmp_dw0, + domain_id); + if (ret < 0) + return ret; + + ret = dr_dump_domain_info_flex_parser(file, "icmp_dw1", + info->caps.flex_parser_id_icmp_dw1, + domain_id); + if (ret < 0) + return ret; + + ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw0", + info->caps.flex_parser_id_icmpv6_dw0, + domain_id); + if (ret < 0) + return ret; + + ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw1", + info->caps.flex_parser_id_icmpv6_dw1, + domain_id); + if (ret < 0) + return ret; + + return 0; +} + +static int +dr_dump_domain(struct seq_file *file, struct mlx5dr_domain *dmn) +{ + u64 domain_id = DR_DBG_PTR_TO_ID(dmn); + int ret; + + seq_printf(file, "%d,0x%llx,%d,0%x,%d,%u.%u.%u,%s,%d,%u,%u,%u\n", + DR_DUMP_REC_TYPE_DOMAIN, + domain_id, dmn->type, dmn->info.caps.gvmi, + dmn->info.supp_sw_steering, + /* package version */ + LINUX_VERSION_MAJOR, LINUX_VERSION_PATCHLEVEL, + LINUX_VERSION_SUBLEVEL, + pci_name(dmn->mdev->pdev), + 0, /* domain flags */ + dmn->num_buddies[DR_ICM_TYPE_STE], + dmn->num_buddies[DR_ICM_TYPE_MODIFY_ACTION], + dmn->num_buddies[DR_ICM_TYPE_MODIFY_HDR_PTRN]); + + ret = dr_dump_domain_info(file, &dmn->info, domain_id); + if (ret < 0) + return ret; + + if (dmn->info.supp_sw_steering) { + ret = dr_dump_send_ring(file, dmn->send_ring, domain_id); + if (ret < 0) + return ret; + } + + return 0; +} + +static int dr_dump_domain_all(struct seq_file *file, struct mlx5dr_domain *dmn) +{ + struct mlx5dr_table *tbl; + int ret; + + mutex_lock(&dmn->dump_info.dbg_mutex); + mlx5dr_domain_lock(dmn); + + ret = dr_dump_domain(file, dmn); + if (ret < 0) + goto unlock_mutex; + + list_for_each_entry(tbl, &dmn->dbg_tbl_list, dbg_node) { + ret = dr_dump_table_all(file, tbl); + if (ret < 0) + break; + } + +unlock_mutex: + mlx5dr_domain_unlock(dmn); + mutex_unlock(&dmn->dump_info.dbg_mutex); + return ret; +} + +static int dr_dump_show(struct seq_file *file, void *priv) +{ + return dr_dump_domain_all(file, file->private); +} +DEFINE_SHOW_ATTRIBUTE(dr_dump); + +void mlx5dr_dbg_init_dump(struct mlx5dr_domain *dmn) +{ + struct mlx5_core_dev *dev = dmn->mdev; + char file_name[128]; + + if (dmn->type != MLX5DR_DOMAIN_TYPE_FDB) { + mlx5_core_warn(dev, + "Steering dump is not supported for NIC RX/TX domains\n"); + return; + } + + dmn->dump_info.steering_debugfs = + debugfs_create_dir("steering", mlx5_debugfs_get_dev_root(dev)); + dmn->dump_info.fdb_debugfs = + debugfs_create_dir("fdb", dmn->dump_info.steering_debugfs); + + sprintf(file_name, "dmn_%p", dmn); + debugfs_create_file(file_name, 0444, dmn->dump_info.fdb_debugfs, + dmn, &dr_dump_fops); + + INIT_LIST_HEAD(&dmn->dbg_tbl_list); + mutex_init(&dmn->dump_info.dbg_mutex); +} + +void mlx5dr_dbg_uninit_dump(struct mlx5dr_domain *dmn) +{ + debugfs_remove_recursive(dmn->dump_info.steering_debugfs); + mutex_destroy(&dmn->dump_info.dbg_mutex); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h new file mode 100644 index 0000000000..def6cf853e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +struct mlx5dr_dbg_dump_info { + struct mutex dbg_mutex; /* protect dbg lists */ + struct dentry *steering_debugfs; + struct dentry *fdb_debugfs; +}; + +void mlx5dr_dbg_init_dump(struct mlx5dr_domain *dmn); +void mlx5dr_dbg_uninit_dump(struct mlx5dr_domain *dmn); +void mlx5dr_dbg_tbl_add(struct mlx5dr_table *tbl); +void mlx5dr_dbg_tbl_del(struct mlx5dr_table *tbl); +void mlx5dr_dbg_rule_add(struct mlx5dr_rule *rule); +void mlx5dr_dbg_rule_del(struct mlx5dr_rule *rule); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_definer.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_definer.c new file mode 100644 index 0000000000..d5ea977519 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_definer.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "dr_types.h" +#include "dr_ste.h" + +struct dr_definer_object { + u32 id; + u16 format_id; + u8 dw_selectors[MLX5_IFC_DEFINER_DW_SELECTORS_NUM]; + u8 byte_selectors[MLX5_IFC_DEFINER_BYTE_SELECTORS_NUM]; + u8 match_mask[DR_STE_SIZE_MATCH_TAG]; + refcount_t refcount; +}; + +static bool dr_definer_compare(struct dr_definer_object *definer, + u16 format_id, u8 *dw_selectors, + u8 *byte_selectors, u8 *match_mask) +{ + int i; + + if (definer->format_id != format_id) + return false; + + for (i = 0; i < MLX5_IFC_DEFINER_DW_SELECTORS_NUM; i++) + if (definer->dw_selectors[i] != dw_selectors[i]) + return false; + + for (i = 0; i < MLX5_IFC_DEFINER_BYTE_SELECTORS_NUM; i++) + if (definer->byte_selectors[i] != byte_selectors[i]) + return false; + + if (memcmp(definer->match_mask, match_mask, DR_STE_SIZE_MATCH_TAG)) + return false; + + return true; +} + +static struct dr_definer_object * +dr_definer_find_obj(struct mlx5dr_domain *dmn, u16 format_id, + u8 *dw_selectors, u8 *byte_selectors, u8 *match_mask) +{ + struct dr_definer_object *definer_obj; + unsigned long id; + + xa_for_each(&dmn->definers_xa, id, definer_obj) { + if (dr_definer_compare(definer_obj, format_id, + dw_selectors, byte_selectors, + match_mask)) + return definer_obj; + } + + return NULL; +} + +static struct dr_definer_object * +dr_definer_create_obj(struct mlx5dr_domain *dmn, u16 format_id, + u8 *dw_selectors, u8 *byte_selectors, u8 *match_mask) +{ + struct dr_definer_object *definer_obj; + int ret = 0; + + definer_obj = kzalloc(sizeof(*definer_obj), GFP_KERNEL); + if (!definer_obj) + return NULL; + + ret = mlx5dr_cmd_create_definer(dmn->mdev, + format_id, + dw_selectors, + byte_selectors, + match_mask, + &definer_obj->id); + if (ret) + goto err_free_definer_obj; + + /* Definer ID can have 32 bits, but STE format + * supports only definers with 8 bit IDs. + */ + if (definer_obj->id > 0xff) { + mlx5dr_err(dmn, "Unsupported definer ID (%d)\n", definer_obj->id); + goto err_destroy_definer; + } + + definer_obj->format_id = format_id; + memcpy(definer_obj->dw_selectors, dw_selectors, sizeof(definer_obj->dw_selectors)); + memcpy(definer_obj->byte_selectors, byte_selectors, sizeof(definer_obj->byte_selectors)); + memcpy(definer_obj->match_mask, match_mask, sizeof(definer_obj->match_mask)); + + refcount_set(&definer_obj->refcount, 1); + + ret = xa_insert(&dmn->definers_xa, definer_obj->id, definer_obj, GFP_KERNEL); + if (ret) { + mlx5dr_dbg(dmn, "Couldn't insert new definer into xarray (%d)\n", ret); + goto err_destroy_definer; + } + + return definer_obj; + +err_destroy_definer: + mlx5dr_cmd_destroy_definer(dmn->mdev, definer_obj->id); +err_free_definer_obj: + kfree(definer_obj); + + return NULL; +} + +static void dr_definer_destroy_obj(struct mlx5dr_domain *dmn, + struct dr_definer_object *definer_obj) +{ + mlx5dr_cmd_destroy_definer(dmn->mdev, definer_obj->id); + xa_erase(&dmn->definers_xa, definer_obj->id); + kfree(definer_obj); +} + +int mlx5dr_definer_get(struct mlx5dr_domain *dmn, u16 format_id, + u8 *dw_selectors, u8 *byte_selectors, + u8 *match_mask, u32 *definer_id) +{ + struct dr_definer_object *definer_obj; + int ret = 0; + + definer_obj = dr_definer_find_obj(dmn, format_id, dw_selectors, + byte_selectors, match_mask); + if (!definer_obj) { + definer_obj = dr_definer_create_obj(dmn, format_id, + dw_selectors, byte_selectors, + match_mask); + if (!definer_obj) + return -ENOMEM; + } else { + refcount_inc(&definer_obj->refcount); + } + + *definer_id = definer_obj->id; + + return ret; +} + +void mlx5dr_definer_put(struct mlx5dr_domain *dmn, u32 definer_id) +{ + struct dr_definer_object *definer_obj; + + definer_obj = xa_load(&dmn->definers_xa, definer_id); + if (!definer_obj) { + mlx5dr_err(dmn, "Definer ID %d not found\n", definer_id); + return; + } + + if (refcount_dec_and_test(&definer_obj->refcount)) + dr_definer_destroy_obj(dmn, definer_obj); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c new file mode 100644 index 0000000000..3d74109f82 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -0,0 +1,579 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include +#include +#include "dr_types.h" + +#define DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, dmn_type) \ + ((dmn)->info.caps.dmn_type##_sw_owner || \ + ((dmn)->info.caps.dmn_type##_sw_owner_v2 && \ + (dmn)->info.caps.sw_format_ver <= MLX5_STEERING_FORMAT_CONNECTX_7)) + +bool mlx5dr_domain_is_support_ptrn_arg(struct mlx5dr_domain *dmn) +{ + return dmn->info.caps.sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX && + dmn->info.caps.support_modify_argument; +} + +static int dr_domain_init_modify_header_resources(struct mlx5dr_domain *dmn) +{ + if (!mlx5dr_domain_is_support_ptrn_arg(dmn)) + return 0; + + dmn->ptrn_mgr = mlx5dr_ptrn_mgr_create(dmn); + if (!dmn->ptrn_mgr) { + mlx5dr_err(dmn, "Couldn't create ptrn_mgr\n"); + return -ENOMEM; + } + + /* create argument pool */ + dmn->arg_mgr = mlx5dr_arg_mgr_create(dmn); + if (!dmn->arg_mgr) { + mlx5dr_err(dmn, "Couldn't create arg_mgr\n"); + goto free_modify_header_pattern; + } + + return 0; + +free_modify_header_pattern: + mlx5dr_ptrn_mgr_destroy(dmn->ptrn_mgr); + return -ENOMEM; +} + +static void dr_domain_destroy_modify_header_resources(struct mlx5dr_domain *dmn) +{ + if (!mlx5dr_domain_is_support_ptrn_arg(dmn)) + return; + + mlx5dr_arg_mgr_destroy(dmn->arg_mgr); + mlx5dr_ptrn_mgr_destroy(dmn->ptrn_mgr); +} + +static void dr_domain_init_csum_recalc_fts(struct mlx5dr_domain *dmn) +{ + /* Per vport cached FW FT for checksum recalculation, this + * recalculation is needed due to a HW bug in STEv0. + */ + xa_init(&dmn->csum_fts_xa); +} + +static void dr_domain_uninit_csum_recalc_fts(struct mlx5dr_domain *dmn) +{ + struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft; + unsigned long i; + + xa_for_each(&dmn->csum_fts_xa, i, recalc_cs_ft) { + if (recalc_cs_ft) + mlx5dr_fw_destroy_recalc_cs_ft(dmn, recalc_cs_ft); + } + + xa_destroy(&dmn->csum_fts_xa); +} + +int mlx5dr_domain_get_recalc_cs_ft_addr(struct mlx5dr_domain *dmn, + u16 vport_num, + u64 *rx_icm_addr) +{ + struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft; + int ret; + + recalc_cs_ft = xa_load(&dmn->csum_fts_xa, vport_num); + if (!recalc_cs_ft) { + /* Table hasn't been created yet */ + recalc_cs_ft = mlx5dr_fw_create_recalc_cs_ft(dmn, vport_num); + if (!recalc_cs_ft) + return -EINVAL; + + ret = xa_err(xa_store(&dmn->csum_fts_xa, vport_num, + recalc_cs_ft, GFP_KERNEL)); + if (ret) + return ret; + } + + *rx_icm_addr = recalc_cs_ft->rx_icm_addr; + + return 0; +} + +static int dr_domain_init_mem_resources(struct mlx5dr_domain *dmn) +{ + int ret; + + dmn->chunks_kmem_cache = kmem_cache_create("mlx5_dr_chunks", + sizeof(struct mlx5dr_icm_chunk), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!dmn->chunks_kmem_cache) { + mlx5dr_err(dmn, "Couldn't create chunks kmem_cache\n"); + return -ENOMEM; + } + + dmn->htbls_kmem_cache = kmem_cache_create("mlx5_dr_htbls", + sizeof(struct mlx5dr_ste_htbl), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!dmn->htbls_kmem_cache) { + mlx5dr_err(dmn, "Couldn't create hash tables kmem_cache\n"); + ret = -ENOMEM; + goto free_chunks_kmem_cache; + } + + dmn->ste_icm_pool = mlx5dr_icm_pool_create(dmn, DR_ICM_TYPE_STE); + if (!dmn->ste_icm_pool) { + mlx5dr_err(dmn, "Couldn't get icm memory\n"); + ret = -ENOMEM; + goto free_htbls_kmem_cache; + } + + dmn->action_icm_pool = mlx5dr_icm_pool_create(dmn, DR_ICM_TYPE_MODIFY_ACTION); + if (!dmn->action_icm_pool) { + mlx5dr_err(dmn, "Couldn't get action icm memory\n"); + ret = -ENOMEM; + goto free_ste_icm_pool; + } + + ret = mlx5dr_send_info_pool_create(dmn); + if (ret) { + mlx5dr_err(dmn, "Couldn't create send info pool\n"); + goto free_action_icm_pool; + } + + return 0; + +free_action_icm_pool: + mlx5dr_icm_pool_destroy(dmn->action_icm_pool); +free_ste_icm_pool: + mlx5dr_icm_pool_destroy(dmn->ste_icm_pool); +free_htbls_kmem_cache: + kmem_cache_destroy(dmn->htbls_kmem_cache); +free_chunks_kmem_cache: + kmem_cache_destroy(dmn->chunks_kmem_cache); + + return ret; +} + +static void dr_domain_uninit_mem_resources(struct mlx5dr_domain *dmn) +{ + mlx5dr_send_info_pool_destroy(dmn); + mlx5dr_icm_pool_destroy(dmn->action_icm_pool); + mlx5dr_icm_pool_destroy(dmn->ste_icm_pool); + kmem_cache_destroy(dmn->htbls_kmem_cache); + kmem_cache_destroy(dmn->chunks_kmem_cache); +} + +static int dr_domain_init_resources(struct mlx5dr_domain *dmn) +{ + int ret; + + dmn->ste_ctx = mlx5dr_ste_get_ctx(dmn->info.caps.sw_format_ver); + if (!dmn->ste_ctx) { + mlx5dr_err(dmn, "SW Steering on this device is unsupported\n"); + return -EOPNOTSUPP; + } + + ret = mlx5_core_alloc_pd(dmn->mdev, &dmn->pdn); + if (ret) { + mlx5dr_err(dmn, "Couldn't allocate PD, ret: %d", ret); + return ret; + } + + dmn->uar = mlx5_get_uars_page(dmn->mdev); + if (IS_ERR(dmn->uar)) { + mlx5dr_err(dmn, "Couldn't allocate UAR\n"); + ret = PTR_ERR(dmn->uar); + goto clean_pd; + } + + ret = dr_domain_init_mem_resources(dmn); + if (ret) { + mlx5dr_err(dmn, "Couldn't create domain memory resources\n"); + goto clean_uar; + } + + ret = dr_domain_init_modify_header_resources(dmn); + if (ret) { + mlx5dr_err(dmn, "Couldn't create modify-header-resources\n"); + goto clean_mem_resources; + } + + ret = mlx5dr_send_ring_alloc(dmn); + if (ret) { + mlx5dr_err(dmn, "Couldn't create send-ring\n"); + goto clean_modify_hdr; + } + + return 0; + +clean_modify_hdr: + dr_domain_destroy_modify_header_resources(dmn); +clean_mem_resources: + dr_domain_uninit_mem_resources(dmn); +clean_uar: + mlx5_put_uars_page(dmn->mdev, dmn->uar); +clean_pd: + mlx5_core_dealloc_pd(dmn->mdev, dmn->pdn); + + return ret; +} + +static void dr_domain_uninit_resources(struct mlx5dr_domain *dmn) +{ + mlx5dr_send_ring_free(dmn, dmn->send_ring); + dr_domain_destroy_modify_header_resources(dmn); + dr_domain_uninit_mem_resources(dmn); + mlx5_put_uars_page(dmn->mdev, dmn->uar); + mlx5_core_dealloc_pd(dmn->mdev, dmn->pdn); +} + +static void dr_domain_fill_uplink_caps(struct mlx5dr_domain *dmn, + struct mlx5dr_cmd_vport_cap *uplink_vport) +{ + struct mlx5dr_esw_caps *esw_caps = &dmn->info.caps.esw_caps; + + uplink_vport->num = MLX5_VPORT_UPLINK; + uplink_vport->icm_address_rx = esw_caps->uplink_icm_address_rx; + uplink_vport->icm_address_tx = esw_caps->uplink_icm_address_tx; + uplink_vport->vport_gvmi = 0; + uplink_vport->vhca_gvmi = dmn->info.caps.gvmi; +} + +static int dr_domain_query_vport(struct mlx5dr_domain *dmn, + u16 vport_number, + bool other_vport, + struct mlx5dr_cmd_vport_cap *vport_caps) +{ + int ret; + + ret = mlx5dr_cmd_query_esw_vport_context(dmn->mdev, + other_vport, + vport_number, + &vport_caps->icm_address_rx, + &vport_caps->icm_address_tx); + if (ret) + return ret; + + ret = mlx5dr_cmd_query_gvmi(dmn->mdev, + other_vport, + vport_number, + &vport_caps->vport_gvmi); + if (ret) + return ret; + + vport_caps->num = vport_number; + vport_caps->vhca_gvmi = dmn->info.caps.gvmi; + + return 0; +} + +static int dr_domain_query_esw_mgr(struct mlx5dr_domain *dmn) +{ + return dr_domain_query_vport(dmn, 0, false, + &dmn->info.caps.vports.esw_manager_caps); +} + +static void dr_domain_query_uplink(struct mlx5dr_domain *dmn) +{ + dr_domain_fill_uplink_caps(dmn, &dmn->info.caps.vports.uplink_caps); +} + +static struct mlx5dr_cmd_vport_cap * +dr_domain_add_vport_cap(struct mlx5dr_domain *dmn, u16 vport) +{ + struct mlx5dr_cmd_caps *caps = &dmn->info.caps; + struct mlx5dr_cmd_vport_cap *vport_caps; + int ret; + + vport_caps = kvzalloc(sizeof(*vport_caps), GFP_KERNEL); + if (!vport_caps) + return NULL; + + ret = dr_domain_query_vport(dmn, vport, true, vport_caps); + if (ret) { + kvfree(vport_caps); + return NULL; + } + + ret = xa_insert(&caps->vports.vports_caps_xa, vport, + vport_caps, GFP_KERNEL); + if (ret) { + mlx5dr_dbg(dmn, "Couldn't insert new vport into xarray (%d)\n", ret); + kvfree(vport_caps); + return ERR_PTR(ret); + } + + return vport_caps; +} + +static bool dr_domain_is_esw_mgr_vport(struct mlx5dr_domain *dmn, u16 vport) +{ + struct mlx5dr_cmd_caps *caps = &dmn->info.caps; + + return (caps->is_ecpf && vport == MLX5_VPORT_ECPF) || + (!caps->is_ecpf && vport == 0); +} + +struct mlx5dr_cmd_vport_cap * +mlx5dr_domain_get_vport_cap(struct mlx5dr_domain *dmn, u16 vport) +{ + struct mlx5dr_cmd_caps *caps = &dmn->info.caps; + struct mlx5dr_cmd_vport_cap *vport_caps; + + if (dr_domain_is_esw_mgr_vport(dmn, vport)) + return &caps->vports.esw_manager_caps; + + if (vport == MLX5_VPORT_UPLINK) + return &caps->vports.uplink_caps; + +vport_load: + vport_caps = xa_load(&caps->vports.vports_caps_xa, vport); + if (vport_caps) + return vport_caps; + + vport_caps = dr_domain_add_vport_cap(dmn, vport); + if (PTR_ERR(vport_caps) == -EBUSY) + /* caps were already stored by another thread */ + goto vport_load; + + return vport_caps; +} + +static void dr_domain_clear_vports(struct mlx5dr_domain *dmn) +{ + struct mlx5dr_cmd_vport_cap *vport_caps; + unsigned long i; + + xa_for_each(&dmn->info.caps.vports.vports_caps_xa, i, vport_caps) { + vport_caps = xa_erase(&dmn->info.caps.vports.vports_caps_xa, i); + kvfree(vport_caps); + } +} + +static int dr_domain_query_fdb_caps(struct mlx5_core_dev *mdev, + struct mlx5dr_domain *dmn) +{ + int ret; + + if (!dmn->info.caps.eswitch_manager) + return -EOPNOTSUPP; + + ret = mlx5dr_cmd_query_esw_caps(mdev, &dmn->info.caps.esw_caps); + if (ret) + return ret; + + dmn->info.caps.fdb_sw_owner = dmn->info.caps.esw_caps.sw_owner; + dmn->info.caps.fdb_sw_owner_v2 = dmn->info.caps.esw_caps.sw_owner_v2; + dmn->info.caps.esw_rx_drop_address = dmn->info.caps.esw_caps.drop_icm_address_rx; + dmn->info.caps.esw_tx_drop_address = dmn->info.caps.esw_caps.drop_icm_address_tx; + + xa_init(&dmn->info.caps.vports.vports_caps_xa); + + /* Query eswitch manager and uplink vports only. Rest of the + * vports (vport 0, VFs and SFs) will be queried dynamically. + */ + + ret = dr_domain_query_esw_mgr(dmn); + if (ret) { + mlx5dr_err(dmn, "Failed to query eswitch manager vport caps (err: %d)", ret); + goto free_vports_caps_xa; + } + + dr_domain_query_uplink(dmn); + + return 0; + +free_vports_caps_xa: + xa_destroy(&dmn->info.caps.vports.vports_caps_xa); + + return ret; +} + +static int dr_domain_caps_init(struct mlx5_core_dev *mdev, + struct mlx5dr_domain *dmn) +{ + struct mlx5dr_cmd_vport_cap *vport_cap; + int ret; + + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) { + mlx5dr_err(dmn, "Failed to allocate domain, bad link type\n"); + return -EOPNOTSUPP; + } + + ret = mlx5dr_cmd_query_device(mdev, &dmn->info.caps); + if (ret) + return ret; + + ret = dr_domain_query_fdb_caps(mdev, dmn); + if (ret) + return ret; + + switch (dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + if (!DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, rx)) + return -ENOTSUPP; + + dmn->info.supp_sw_steering = true; + dmn->info.rx.type = DR_DOMAIN_NIC_TYPE_RX; + dmn->info.rx.default_icm_addr = dmn->info.caps.nic_rx_drop_address; + dmn->info.rx.drop_icm_addr = dmn->info.caps.nic_rx_drop_address; + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + if (!DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, tx)) + return -ENOTSUPP; + + dmn->info.supp_sw_steering = true; + dmn->info.tx.type = DR_DOMAIN_NIC_TYPE_TX; + dmn->info.tx.default_icm_addr = dmn->info.caps.nic_tx_allow_address; + dmn->info.tx.drop_icm_addr = dmn->info.caps.nic_tx_drop_address; + break; + case MLX5DR_DOMAIN_TYPE_FDB: + if (!dmn->info.caps.eswitch_manager) + return -ENOTSUPP; + + if (!DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, fdb)) + return -ENOTSUPP; + + dmn->info.rx.type = DR_DOMAIN_NIC_TYPE_RX; + dmn->info.tx.type = DR_DOMAIN_NIC_TYPE_TX; + vport_cap = &dmn->info.caps.vports.esw_manager_caps; + + dmn->info.supp_sw_steering = true; + dmn->info.tx.default_icm_addr = vport_cap->icm_address_tx; + dmn->info.rx.default_icm_addr = vport_cap->icm_address_rx; + dmn->info.rx.drop_icm_addr = dmn->info.caps.esw_rx_drop_address; + dmn->info.tx.drop_icm_addr = dmn->info.caps.esw_tx_drop_address; + break; + default: + mlx5dr_err(dmn, "Invalid domain\n"); + ret = -EINVAL; + break; + } + + return ret; +} + +static void dr_domain_caps_uninit(struct mlx5dr_domain *dmn) +{ + dr_domain_clear_vports(dmn); + xa_destroy(&dmn->info.caps.vports.vports_caps_xa); +} + +struct mlx5dr_domain * +mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) +{ + struct mlx5dr_domain *dmn; + int ret; + + if (type > MLX5DR_DOMAIN_TYPE_FDB) + return NULL; + + dmn = kzalloc(sizeof(*dmn), GFP_KERNEL); + if (!dmn) + return NULL; + + dmn->mdev = mdev; + dmn->type = type; + refcount_set(&dmn->refcount, 1); + mutex_init(&dmn->info.rx.mutex); + mutex_init(&dmn->info.tx.mutex); + xa_init(&dmn->definers_xa); + xa_init(&dmn->peer_dmn_xa); + + if (dr_domain_caps_init(mdev, dmn)) { + mlx5dr_err(dmn, "Failed init domain, no caps\n"); + goto def_xa_destroy; + } + + dmn->info.max_log_action_icm_sz = DR_CHUNK_SIZE_4K; + dmn->info.max_log_sw_icm_sz = min_t(u32, DR_CHUNK_SIZE_1024K, + dmn->info.caps.log_icm_size); + dmn->info.max_log_modify_hdr_pattern_icm_sz = + min_t(u32, DR_CHUNK_SIZE_4K, + dmn->info.caps.log_modify_pattern_icm_size); + + if (!dmn->info.supp_sw_steering) { + mlx5dr_err(dmn, "SW steering is not supported\n"); + goto uninit_caps; + } + + /* Allocate resources */ + ret = dr_domain_init_resources(dmn); + if (ret) { + mlx5dr_err(dmn, "Failed init domain resources\n"); + goto uninit_caps; + } + + dr_domain_init_csum_recalc_fts(dmn); + mlx5dr_dbg_init_dump(dmn); + return dmn; + +uninit_caps: + dr_domain_caps_uninit(dmn); +def_xa_destroy: + xa_destroy(&dmn->peer_dmn_xa); + xa_destroy(&dmn->definers_xa); + kfree(dmn); + return NULL; +} + +/* Assure synchronization of the device steering tables with updates made by SW + * insertion. + */ +int mlx5dr_domain_sync(struct mlx5dr_domain *dmn, u32 flags) +{ + int ret = 0; + + if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_SW) { + mlx5dr_domain_lock(dmn); + ret = mlx5dr_send_ring_force_drain(dmn); + mlx5dr_domain_unlock(dmn); + if (ret) { + mlx5dr_err(dmn, "Force drain failed flags: %d, ret: %d\n", + flags, ret); + return ret; + } + } + + if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_HW) + ret = mlx5dr_cmd_sync_steering(dmn->mdev); + + return ret; +} + +int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn) +{ + if (WARN_ON_ONCE(refcount_read(&dmn->refcount) > 1)) + return -EBUSY; + + /* make sure resources are not used by the hardware */ + mlx5dr_cmd_sync_steering(dmn->mdev); + mlx5dr_dbg_uninit_dump(dmn); + dr_domain_uninit_csum_recalc_fts(dmn); + dr_domain_uninit_resources(dmn); + dr_domain_caps_uninit(dmn); + xa_destroy(&dmn->peer_dmn_xa); + xa_destroy(&dmn->definers_xa); + mutex_destroy(&dmn->info.tx.mutex); + mutex_destroy(&dmn->info.rx.mutex); + kfree(dmn); + return 0; +} + +void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, + struct mlx5dr_domain *peer_dmn, + u16 peer_vhca_id) +{ + struct mlx5dr_domain *peer; + + mlx5dr_domain_lock(dmn); + + peer = xa_load(&dmn->peer_dmn_xa, peer_vhca_id); + if (peer) + refcount_dec(&peer->refcount); + + WARN_ON(xa_err(xa_store(&dmn->peer_dmn_xa, peer_vhca_id, peer_dmn, GFP_KERNEL))); + + peer = xa_load(&dmn->peer_dmn_xa, peer_vhca_id); + if (peer) + refcount_inc(&peer->refcount); + + mlx5dr_domain_unlock(dmn); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c new file mode 100644 index 0000000000..f05ef0cd54 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include +#include "dr_types.h" + +struct mlx5dr_fw_recalc_cs_ft * +mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u16 vport_num) +{ + struct mlx5dr_cmd_create_flow_table_attr ft_attr = {}; + struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft; + u32 table_id, group_id, modify_hdr_id; + u64 rx_icm_addr, modify_ttl_action; + int ret; + + recalc_cs_ft = kzalloc(sizeof(*recalc_cs_ft), GFP_KERNEL); + if (!recalc_cs_ft) + return NULL; + + ft_attr.table_type = MLX5_FLOW_TABLE_TYPE_FDB; + ft_attr.level = dmn->info.caps.max_ft_level - 1; + ft_attr.term_tbl = true; + + ret = mlx5dr_cmd_create_flow_table(dmn->mdev, + &ft_attr, + &rx_icm_addr, + &table_id); + if (ret) { + mlx5dr_err(dmn, "Failed creating TTL W/A FW flow table %d\n", ret); + goto free_ttl_tbl; + } + + ret = mlx5dr_cmd_create_empty_flow_group(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + table_id, &group_id); + if (ret) { + mlx5dr_err(dmn, "Failed creating TTL W/A FW flow group %d\n", ret); + goto destroy_flow_table; + } + + /* Modify TTL action by adding zero to trigger CS recalculation */ + modify_ttl_action = 0; + MLX5_SET(set_action_in, &modify_ttl_action, action_type, MLX5_ACTION_TYPE_ADD); + MLX5_SET(set_action_in, &modify_ttl_action, field, MLX5_ACTION_IN_FIELD_OUT_IP_TTL); + + ret = mlx5dr_cmd_alloc_modify_header(dmn->mdev, MLX5_FLOW_TABLE_TYPE_FDB, 1, + &modify_ttl_action, + &modify_hdr_id); + if (ret) { + mlx5dr_err(dmn, "Failed modify header TTL %d\n", ret); + goto destroy_flow_group; + } + + ret = mlx5dr_cmd_set_fte_modify_and_vport(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + table_id, group_id, modify_hdr_id, + vport_num); + if (ret) { + mlx5dr_err(dmn, "Failed setting TTL W/A flow table entry %d\n", ret); + goto dealloc_modify_header; + } + + recalc_cs_ft->modify_hdr_id = modify_hdr_id; + recalc_cs_ft->rx_icm_addr = rx_icm_addr; + recalc_cs_ft->table_id = table_id; + recalc_cs_ft->group_id = group_id; + + return recalc_cs_ft; + +dealloc_modify_header: + mlx5dr_cmd_dealloc_modify_header(dmn->mdev, modify_hdr_id); +destroy_flow_group: + mlx5dr_cmd_destroy_flow_group(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + table_id, group_id); +destroy_flow_table: + mlx5dr_cmd_destroy_flow_table(dmn->mdev, table_id, MLX5_FLOW_TABLE_TYPE_FDB); +free_ttl_tbl: + kfree(recalc_cs_ft); + return NULL; +} + +void mlx5dr_fw_destroy_recalc_cs_ft(struct mlx5dr_domain *dmn, + struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft) +{ + mlx5dr_cmd_del_flow_table_entry(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + recalc_cs_ft->table_id); + mlx5dr_cmd_dealloc_modify_header(dmn->mdev, recalc_cs_ft->modify_hdr_id); + mlx5dr_cmd_destroy_flow_group(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + recalc_cs_ft->table_id, + recalc_cs_ft->group_id); + mlx5dr_cmd_destroy_flow_table(dmn->mdev, + recalc_cs_ft->table_id, + MLX5_FLOW_TABLE_TYPE_FDB); + + kfree(recalc_cs_ft); +} + +int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn, + struct mlx5dr_cmd_flow_destination_hw_info *dest, + int num_dest, + bool reformat_req, + u32 *tbl_id, + u32 *group_id, + bool ignore_flow_level, + u32 flow_source) +{ + struct mlx5dr_cmd_create_flow_table_attr ft_attr = {}; + struct mlx5dr_cmd_fte_info fte_info = {}; + u32 val[MLX5_ST_SZ_DW_MATCH_PARAM] = {}; + struct mlx5dr_cmd_ft_info ft_info = {}; + int ret; + + ft_attr.table_type = MLX5_FLOW_TABLE_TYPE_FDB; + ft_attr.level = min_t(int, dmn->info.caps.max_ft_level - 2, + MLX5_FT_MAX_MULTIPATH_LEVEL); + ft_attr.reformat_en = reformat_req; + ft_attr.decap_en = reformat_req; + + ret = mlx5dr_cmd_create_flow_table(dmn->mdev, &ft_attr, NULL, tbl_id); + if (ret) { + mlx5dr_err(dmn, "Failed creating multi dest FW flow table %d\n", ret); + return ret; + } + + ret = mlx5dr_cmd_create_empty_flow_group(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + *tbl_id, group_id); + if (ret) { + mlx5dr_err(dmn, "Failed creating multi dest FW flow group %d\n", ret); + goto free_flow_table; + } + + ft_info.id = *tbl_id; + ft_info.type = FS_FT_FDB; + fte_info.action.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + fte_info.dests_size = num_dest; + fte_info.val = val; + fte_info.dest_arr = dest; + fte_info.ignore_flow_level = ignore_flow_level; + fte_info.flow_context.flow_source = flow_source; + + ret = mlx5dr_cmd_set_fte(dmn->mdev, 0, 0, &ft_info, *group_id, &fte_info); + if (ret) { + mlx5dr_err(dmn, "Failed setting fte into table %d\n", ret); + goto free_flow_group; + } + + return 0; + +free_flow_group: + mlx5dr_cmd_destroy_flow_group(dmn->mdev, MLX5_FLOW_TABLE_TYPE_FDB, + *tbl_id, *group_id); +free_flow_table: + mlx5dr_cmd_destroy_flow_table(dmn->mdev, *tbl_id, + MLX5_FLOW_TABLE_TYPE_FDB); + return ret; +} + +void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn, + u32 tbl_id, u32 group_id) +{ + mlx5dr_cmd_del_flow_table_entry(dmn->mdev, FS_FT_FDB, tbl_id); + mlx5dr_cmd_destroy_flow_group(dmn->mdev, + MLX5_FLOW_TABLE_TYPE_FDB, + tbl_id, group_id); + mlx5dr_cmd_destroy_flow_table(dmn->mdev, tbl_id, + MLX5_FLOW_TABLE_TYPE_FDB); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c new file mode 100644 index 0000000000..0b5af9f3f6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c @@ -0,0 +1,576 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +#define DR_ICM_MODIFY_HDR_ALIGN_BASE 64 +#define DR_ICM_POOL_STE_HOT_MEM_PERCENT 25 +#define DR_ICM_POOL_MODIFY_HDR_PTRN_HOT_MEM_PERCENT 50 +#define DR_ICM_POOL_MODIFY_ACTION_HOT_MEM_PERCENT 90 + +struct mlx5dr_icm_hot_chunk { + struct mlx5dr_icm_buddy_mem *buddy_mem; + unsigned int seg; + enum mlx5dr_icm_chunk_size size; +}; + +struct mlx5dr_icm_pool { + enum mlx5dr_icm_type icm_type; + enum mlx5dr_icm_chunk_size max_log_chunk_sz; + struct mlx5dr_domain *dmn; + struct kmem_cache *chunks_kmem_cache; + + /* memory management */ + struct mutex mutex; /* protect the ICM pool and ICM buddy */ + struct list_head buddy_mem_list; + + /* Hardware may be accessing this memory but at some future, + * undetermined time, it might cease to do so. + * sync_ste command sets them free. + */ + struct mlx5dr_icm_hot_chunk *hot_chunks_arr; + u32 hot_chunks_num; + u64 hot_memory_size; + /* hot memory size threshold for triggering sync */ + u64 th; +}; + +struct mlx5dr_icm_dm { + u32 obj_id; + enum mlx5_sw_icm_type type; + phys_addr_t addr; + size_t length; +}; + +struct mlx5dr_icm_mr { + u32 mkey; + struct mlx5dr_icm_dm dm; + struct mlx5dr_domain *dmn; + size_t length; + u64 icm_start_addr; +}; + +static int dr_icm_create_dm_mkey(struct mlx5_core_dev *mdev, + u32 pd, u64 length, u64 start_addr, int mode, + u32 *mkey) +{ + u32 inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {}; + void *mkc; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + MLX5_SET(mkc, mkc, access_mode_1_0, mode); + MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + if (mode == MLX5_MKC_ACCESS_MODE_SW_ICM) { + MLX5_SET(mkc, mkc, rw, 1); + MLX5_SET(mkc, mkc, rr, 1); + } + + MLX5_SET64(mkc, mkc, len, length); + MLX5_SET(mkc, mkc, pd, pd); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET64(mkc, mkc, start_addr, start_addr); + + return mlx5_core_create_mkey(mdev, mkey, in, inlen); +} + +u64 mlx5dr_icm_pool_get_chunk_mr_addr(struct mlx5dr_icm_chunk *chunk) +{ + u32 offset = mlx5dr_icm_pool_dm_type_to_entry_size(chunk->buddy_mem->pool->icm_type); + + return (u64)offset * chunk->seg; +} + +u32 mlx5dr_icm_pool_get_chunk_rkey(struct mlx5dr_icm_chunk *chunk) +{ + return chunk->buddy_mem->icm_mr->mkey; +} + +u64 mlx5dr_icm_pool_get_chunk_icm_addr(struct mlx5dr_icm_chunk *chunk) +{ + u32 size = mlx5dr_icm_pool_dm_type_to_entry_size(chunk->buddy_mem->pool->icm_type); + + return (u64)chunk->buddy_mem->icm_mr->icm_start_addr + size * chunk->seg; +} + +u32 mlx5dr_icm_pool_get_chunk_byte_size(struct mlx5dr_icm_chunk *chunk) +{ + return mlx5dr_icm_pool_chunk_size_to_byte(chunk->size, + chunk->buddy_mem->pool->icm_type); +} + +u32 mlx5dr_icm_pool_get_chunk_num_of_entries(struct mlx5dr_icm_chunk *chunk) +{ + return mlx5dr_icm_pool_chunk_size_to_entries(chunk->size); +} + +static struct mlx5dr_icm_mr * +dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool) +{ + struct mlx5_core_dev *mdev = pool->dmn->mdev; + enum mlx5_sw_icm_type dm_type = 0; + struct mlx5dr_icm_mr *icm_mr; + size_t log_align_base = 0; + int err; + + icm_mr = kvzalloc(sizeof(*icm_mr), GFP_KERNEL); + if (!icm_mr) + return NULL; + + icm_mr->dmn = pool->dmn; + + icm_mr->dm.length = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, + pool->icm_type); + + switch (pool->icm_type) { + case DR_ICM_TYPE_STE: + dm_type = MLX5_SW_ICM_TYPE_STEERING; + log_align_base = ilog2(icm_mr->dm.length); + break; + case DR_ICM_TYPE_MODIFY_ACTION: + dm_type = MLX5_SW_ICM_TYPE_HEADER_MODIFY; + /* Align base is 64B */ + log_align_base = ilog2(DR_ICM_MODIFY_HDR_ALIGN_BASE); + break; + case DR_ICM_TYPE_MODIFY_HDR_PTRN: + dm_type = MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN; + /* Align base is 64B */ + log_align_base = ilog2(DR_ICM_MODIFY_HDR_ALIGN_BASE); + break; + default: + WARN_ON(pool->icm_type); + } + + icm_mr->dm.type = dm_type; + + err = mlx5_dm_sw_icm_alloc(mdev, icm_mr->dm.type, icm_mr->dm.length, + log_align_base, 0, &icm_mr->dm.addr, + &icm_mr->dm.obj_id); + if (err) { + mlx5dr_err(pool->dmn, "Failed to allocate SW ICM memory, err (%d)\n", err); + goto free_icm_mr; + } + + /* Register device memory */ + err = dr_icm_create_dm_mkey(mdev, pool->dmn->pdn, + icm_mr->dm.length, + icm_mr->dm.addr, + MLX5_MKC_ACCESS_MODE_SW_ICM, + &icm_mr->mkey); + if (err) { + mlx5dr_err(pool->dmn, "Failed to create SW ICM MKEY, err (%d)\n", err); + goto free_dm; + } + + icm_mr->icm_start_addr = icm_mr->dm.addr; + + if (icm_mr->icm_start_addr & (BIT(log_align_base) - 1)) { + mlx5dr_err(pool->dmn, "Failed to get Aligned ICM mem (asked: %zu)\n", + log_align_base); + goto free_mkey; + } + + return icm_mr; + +free_mkey: + mlx5_core_destroy_mkey(mdev, icm_mr->mkey); +free_dm: + mlx5_dm_sw_icm_dealloc(mdev, icm_mr->dm.type, icm_mr->dm.length, 0, + icm_mr->dm.addr, icm_mr->dm.obj_id); +free_icm_mr: + kvfree(icm_mr); + return NULL; +} + +static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr) +{ + struct mlx5_core_dev *mdev = icm_mr->dmn->mdev; + struct mlx5dr_icm_dm *dm = &icm_mr->dm; + + mlx5_core_destroy_mkey(mdev, icm_mr->mkey); + mlx5_dm_sw_icm_dealloc(mdev, dm->type, dm->length, 0, + dm->addr, dm->obj_id); + kvfree(icm_mr); +} + +static int dr_icm_buddy_get_ste_size(struct mlx5dr_icm_buddy_mem *buddy) +{ + /* We support only one type of STE size, both for ConnectX-5 and later + * devices. Once the support for match STE which has a larger tag is + * added (32B instead of 16B), the STE size for devices later than + * ConnectX-5 needs to account for that. + */ + return DR_STE_SIZE_REDUCED; +} + +static void dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk, int offset) +{ + int num_of_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(chunk); + struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem; + int ste_size = dr_icm_buddy_get_ste_size(buddy); + int index = offset / DR_STE_SIZE; + + chunk->ste_arr = &buddy->ste_arr[index]; + chunk->miss_list = &buddy->miss_list[index]; + chunk->hw_ste_arr = buddy->hw_ste_arr + index * ste_size; + + memset(chunk->hw_ste_arr, 0, num_of_entries * ste_size); + memset(chunk->ste_arr, 0, + num_of_entries * sizeof(chunk->ste_arr[0])); +} + +static int dr_icm_buddy_init_ste_cache(struct mlx5dr_icm_buddy_mem *buddy) +{ + int num_of_entries = + mlx5dr_icm_pool_chunk_size_to_entries(buddy->pool->max_log_chunk_sz); + + buddy->ste_arr = kvcalloc(num_of_entries, + sizeof(struct mlx5dr_ste), GFP_KERNEL); + if (!buddy->ste_arr) + return -ENOMEM; + + /* Preallocate full STE size on non-ConnectX-5 devices since + * we need to support both full and reduced with the same cache. + */ + buddy->hw_ste_arr = kvcalloc(num_of_entries, + dr_icm_buddy_get_ste_size(buddy), GFP_KERNEL); + if (!buddy->hw_ste_arr) + goto free_ste_arr; + + buddy->miss_list = kvmalloc(num_of_entries * sizeof(struct list_head), GFP_KERNEL); + if (!buddy->miss_list) + goto free_hw_ste_arr; + + return 0; + +free_hw_ste_arr: + kvfree(buddy->hw_ste_arr); +free_ste_arr: + kvfree(buddy->ste_arr); + return -ENOMEM; +} + +static void dr_icm_buddy_cleanup_ste_cache(struct mlx5dr_icm_buddy_mem *buddy) +{ + kvfree(buddy->ste_arr); + kvfree(buddy->hw_ste_arr); + kvfree(buddy->miss_list); +} + +static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool) +{ + struct mlx5dr_icm_buddy_mem *buddy; + struct mlx5dr_icm_mr *icm_mr; + + icm_mr = dr_icm_pool_mr_create(pool); + if (!icm_mr) + return -ENOMEM; + + buddy = kvzalloc(sizeof(*buddy), GFP_KERNEL); + if (!buddy) + goto free_mr; + + if (mlx5dr_buddy_init(buddy, pool->max_log_chunk_sz)) + goto err_free_buddy; + + buddy->icm_mr = icm_mr; + buddy->pool = pool; + + if (pool->icm_type == DR_ICM_TYPE_STE) { + /* Reduce allocations by preallocating and reusing the STE structures */ + if (dr_icm_buddy_init_ste_cache(buddy)) + goto err_cleanup_buddy; + } + + /* add it to the -start- of the list in order to search in it first */ + list_add(&buddy->list_node, &pool->buddy_mem_list); + + pool->dmn->num_buddies[pool->icm_type]++; + + return 0; + +err_cleanup_buddy: + mlx5dr_buddy_cleanup(buddy); +err_free_buddy: + kvfree(buddy); +free_mr: + dr_icm_pool_mr_destroy(icm_mr); + return -ENOMEM; +} + +static void dr_icm_buddy_destroy(struct mlx5dr_icm_buddy_mem *buddy) +{ + enum mlx5dr_icm_type icm_type = buddy->pool->icm_type; + + dr_icm_pool_mr_destroy(buddy->icm_mr); + + mlx5dr_buddy_cleanup(buddy); + + if (icm_type == DR_ICM_TYPE_STE) + dr_icm_buddy_cleanup_ste_cache(buddy); + + buddy->pool->dmn->num_buddies[icm_type]--; + + kvfree(buddy); +} + +static void +dr_icm_chunk_init(struct mlx5dr_icm_chunk *chunk, + struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size, + struct mlx5dr_icm_buddy_mem *buddy_mem_pool, + unsigned int seg) +{ + int offset; + + chunk->seg = seg; + chunk->size = chunk_size; + chunk->buddy_mem = buddy_mem_pool; + + if (pool->icm_type == DR_ICM_TYPE_STE) { + offset = mlx5dr_icm_pool_dm_type_to_entry_size(pool->icm_type) * seg; + dr_icm_chunk_ste_init(chunk, offset); + } + + buddy_mem_pool->used_memory += mlx5dr_icm_pool_get_chunk_byte_size(chunk); +} + +static bool dr_icm_pool_is_sync_required(struct mlx5dr_icm_pool *pool) +{ + return pool->hot_memory_size > pool->th; +} + +static void dr_icm_pool_clear_hot_chunks_arr(struct mlx5dr_icm_pool *pool) +{ + struct mlx5dr_icm_hot_chunk *hot_chunk; + u32 i, num_entries; + + for (i = 0; i < pool->hot_chunks_num; i++) { + hot_chunk = &pool->hot_chunks_arr[i]; + num_entries = mlx5dr_icm_pool_chunk_size_to_entries(hot_chunk->size); + mlx5dr_buddy_free_mem(hot_chunk->buddy_mem, + hot_chunk->seg, ilog2(num_entries)); + hot_chunk->buddy_mem->used_memory -= + mlx5dr_icm_pool_chunk_size_to_byte(hot_chunk->size, + pool->icm_type); + } + + pool->hot_chunks_num = 0; + pool->hot_memory_size = 0; +} + +static int dr_icm_pool_sync_all_buddy_pools(struct mlx5dr_icm_pool *pool) +{ + struct mlx5dr_icm_buddy_mem *buddy, *tmp_buddy; + int err; + + err = mlx5dr_cmd_sync_steering(pool->dmn->mdev); + if (err) { + mlx5dr_err(pool->dmn, "Failed to sync to HW (err: %d)\n", err); + return err; + } + + dr_icm_pool_clear_hot_chunks_arr(pool); + + list_for_each_entry_safe(buddy, tmp_buddy, &pool->buddy_mem_list, list_node) { + if (!buddy->used_memory && pool->icm_type == DR_ICM_TYPE_STE) + dr_icm_buddy_destroy(buddy); + } + + return 0; +} + +static int dr_icm_handle_buddies_get_mem(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size, + struct mlx5dr_icm_buddy_mem **buddy, + unsigned int *seg) +{ + struct mlx5dr_icm_buddy_mem *buddy_mem_pool; + bool new_mem = false; + int err; + +alloc_buddy_mem: + /* find the next free place from the buddy list */ + list_for_each_entry(buddy_mem_pool, &pool->buddy_mem_list, list_node) { + err = mlx5dr_buddy_alloc_mem(buddy_mem_pool, + chunk_size, seg); + if (!err) + goto found; + + if (WARN_ON(new_mem)) { + /* We have new memory pool, first in the list */ + mlx5dr_err(pool->dmn, + "No memory for order: %d\n", + chunk_size); + goto out; + } + } + + /* no more available allocators in that pool, create new */ + err = dr_icm_buddy_create(pool); + if (err) { + mlx5dr_err(pool->dmn, + "Failed creating buddy for order %d\n", + chunk_size); + goto out; + } + + /* mark we have new memory, first in list */ + new_mem = true; + goto alloc_buddy_mem; + +found: + *buddy = buddy_mem_pool; +out: + return err; +} + +/* Allocate an ICM chunk, each chunk holds a piece of ICM memory and + * also memory used for HW STE management for optimizations. + */ +struct mlx5dr_icm_chunk * +mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size) +{ + struct mlx5dr_icm_chunk *chunk = NULL; + struct mlx5dr_icm_buddy_mem *buddy; + unsigned int seg; + int ret; + + if (chunk_size > pool->max_log_chunk_sz) + return NULL; + + mutex_lock(&pool->mutex); + /* find mem, get back the relevant buddy pool and seg in that mem */ + ret = dr_icm_handle_buddies_get_mem(pool, chunk_size, &buddy, &seg); + if (ret) + goto out; + + chunk = kmem_cache_alloc(pool->chunks_kmem_cache, GFP_KERNEL); + if (!chunk) + goto out_err; + + dr_icm_chunk_init(chunk, pool, chunk_size, buddy, seg); + + goto out; + +out_err: + mlx5dr_buddy_free_mem(buddy, seg, chunk_size); +out: + mutex_unlock(&pool->mutex); + return chunk; +} + +void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk) +{ + struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem; + struct mlx5dr_icm_pool *pool = buddy->pool; + struct mlx5dr_icm_hot_chunk *hot_chunk; + struct kmem_cache *chunks_cache; + + chunks_cache = pool->chunks_kmem_cache; + + /* move the chunk to the waiting chunks array, AKA "hot" memory */ + mutex_lock(&pool->mutex); + + pool->hot_memory_size += mlx5dr_icm_pool_get_chunk_byte_size(chunk); + + hot_chunk = &pool->hot_chunks_arr[pool->hot_chunks_num++]; + hot_chunk->buddy_mem = chunk->buddy_mem; + hot_chunk->seg = chunk->seg; + hot_chunk->size = chunk->size; + + kmem_cache_free(chunks_cache, chunk); + + /* Check if we have chunks that are waiting for sync-ste */ + if (dr_icm_pool_is_sync_required(pool)) + dr_icm_pool_sync_all_buddy_pools(pool); + + mutex_unlock(&pool->mutex); +} + +struct mlx5dr_ste_htbl *mlx5dr_icm_pool_alloc_htbl(struct mlx5dr_icm_pool *pool) +{ + return kmem_cache_alloc(pool->dmn->htbls_kmem_cache, GFP_KERNEL); +} + +void mlx5dr_icm_pool_free_htbl(struct mlx5dr_icm_pool *pool, struct mlx5dr_ste_htbl *htbl) +{ + kmem_cache_free(pool->dmn->htbls_kmem_cache, htbl); +} + +struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn, + enum mlx5dr_icm_type icm_type) +{ + u32 num_of_chunks, entry_size; + struct mlx5dr_icm_pool *pool; + u32 max_hot_size = 0; + + pool = kvzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + return NULL; + + pool->dmn = dmn; + pool->icm_type = icm_type; + pool->chunks_kmem_cache = dmn->chunks_kmem_cache; + + INIT_LIST_HEAD(&pool->buddy_mem_list); + mutex_init(&pool->mutex); + + switch (icm_type) { + case DR_ICM_TYPE_STE: + pool->max_log_chunk_sz = dmn->info.max_log_sw_icm_sz; + max_hot_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, + pool->icm_type) * + DR_ICM_POOL_STE_HOT_MEM_PERCENT / 100; + break; + case DR_ICM_TYPE_MODIFY_ACTION: + pool->max_log_chunk_sz = dmn->info.max_log_action_icm_sz; + max_hot_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, + pool->icm_type) * + DR_ICM_POOL_MODIFY_ACTION_HOT_MEM_PERCENT / 100; + break; + case DR_ICM_TYPE_MODIFY_HDR_PTRN: + pool->max_log_chunk_sz = dmn->info.max_log_modify_hdr_pattern_icm_sz; + max_hot_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, + pool->icm_type) * + DR_ICM_POOL_MODIFY_HDR_PTRN_HOT_MEM_PERCENT / 100; + break; + default: + WARN_ON(icm_type); + } + + entry_size = mlx5dr_icm_pool_dm_type_to_entry_size(pool->icm_type); + + num_of_chunks = DIV_ROUND_UP(max_hot_size, entry_size) + 1; + pool->th = max_hot_size; + + pool->hot_chunks_arr = kvcalloc(num_of_chunks, + sizeof(struct mlx5dr_icm_hot_chunk), + GFP_KERNEL); + if (!pool->hot_chunks_arr) + goto free_pool; + + return pool; + +free_pool: + kvfree(pool); + return NULL; +} + +void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool) +{ + struct mlx5dr_icm_buddy_mem *buddy, *tmp_buddy; + + dr_icm_pool_clear_hot_chunks_arr(pool); + + list_for_each_entry_safe(buddy, tmp_buddy, &pool->buddy_mem_list, list_node) + dr_icm_buddy_destroy(buddy); + + kvfree(pool->hot_chunks_arr); + mutex_destroy(&pool->mutex); + kvfree(pool); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c new file mode 100644 index 0000000000..0726848eb3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c @@ -0,0 +1,1108 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +static bool dr_mask_is_smac_set(struct mlx5dr_match_spec *spec) +{ + return (spec->smac_47_16 || spec->smac_15_0); +} + +static bool dr_mask_is_dmac_set(struct mlx5dr_match_spec *spec) +{ + return (spec->dmac_47_16 || spec->dmac_15_0); +} + +static bool dr_mask_is_l3_base_set(struct mlx5dr_match_spec *spec) +{ + return (spec->ip_protocol || spec->frag || spec->tcp_flags || + spec->ip_ecn || spec->ip_dscp); +} + +static bool dr_mask_is_tcp_udp_base_set(struct mlx5dr_match_spec *spec) +{ + return (spec->tcp_sport || spec->tcp_dport || + spec->udp_sport || spec->udp_dport); +} + +static bool dr_mask_is_ipv4_set(struct mlx5dr_match_spec *spec) +{ + return (spec->dst_ip_31_0 || spec->src_ip_31_0); +} + +static bool dr_mask_is_ipv4_5_tuple_set(struct mlx5dr_match_spec *spec) +{ + return (dr_mask_is_l3_base_set(spec) || + dr_mask_is_tcp_udp_base_set(spec) || + dr_mask_is_ipv4_set(spec)); +} + +static bool dr_mask_is_eth_l2_tnl_set(struct mlx5dr_match_misc *misc) +{ + return misc->vxlan_vni; +} + +static bool dr_mask_is_ttl_set(struct mlx5dr_match_spec *spec) +{ + return spec->ttl_hoplimit; +} + +static bool dr_mask_is_ipv4_ihl_set(struct mlx5dr_match_spec *spec) +{ + return spec->ipv4_ihl; +} + +#define DR_MASK_IS_L2_DST(_spec, _misc, _inner_outer) (_spec.first_vid || \ + (_spec).first_cfi || (_spec).first_prio || (_spec).cvlan_tag || \ + (_spec).svlan_tag || (_spec).dmac_47_16 || (_spec).dmac_15_0 || \ + (_spec).ethertype || (_spec).ip_version || \ + (_misc)._inner_outer##_second_vid || \ + (_misc)._inner_outer##_second_cfi || \ + (_misc)._inner_outer##_second_prio || \ + (_misc)._inner_outer##_second_cvlan_tag || \ + (_misc)._inner_outer##_second_svlan_tag) + +#define DR_MASK_IS_ETH_L4_SET(_spec, _misc, _inner_outer) ( \ + dr_mask_is_l3_base_set(&(_spec)) || \ + dr_mask_is_tcp_udp_base_set(&(_spec)) || \ + dr_mask_is_ttl_set(&(_spec)) || \ + (_misc)._inner_outer##_ipv6_flow_label) + +#define DR_MASK_IS_ETH_L4_MISC_SET(_misc3, _inner_outer) ( \ + (_misc3)._inner_outer##_tcp_seq_num || \ + (_misc3)._inner_outer##_tcp_ack_num) + +#define DR_MASK_IS_FIRST_MPLS_SET(_misc2, _inner_outer) ( \ + (_misc2)._inner_outer##_first_mpls_label || \ + (_misc2)._inner_outer##_first_mpls_exp || \ + (_misc2)._inner_outer##_first_mpls_s_bos || \ + (_misc2)._inner_outer##_first_mpls_ttl) + +static bool dr_mask_is_tnl_gre_set(struct mlx5dr_match_misc *misc) +{ + return (misc->gre_key_h || misc->gre_key_l || + misc->gre_protocol || misc->gre_c_present || + misc->gre_k_present || misc->gre_s_present); +} + +#define DR_MASK_IS_OUTER_MPLS_OVER_GRE_SET(_misc) (\ + (_misc)->outer_first_mpls_over_gre_label || \ + (_misc)->outer_first_mpls_over_gre_exp || \ + (_misc)->outer_first_mpls_over_gre_s_bos || \ + (_misc)->outer_first_mpls_over_gre_ttl) + +#define DR_MASK_IS_OUTER_MPLS_OVER_UDP_SET(_misc) (\ + (_misc)->outer_first_mpls_over_udp_label || \ + (_misc)->outer_first_mpls_over_udp_exp || \ + (_misc)->outer_first_mpls_over_udp_s_bos || \ + (_misc)->outer_first_mpls_over_udp_ttl) + +static bool +dr_mask_is_vxlan_gpe_set(struct mlx5dr_match_misc3 *misc3) +{ + return (misc3->outer_vxlan_gpe_vni || + misc3->outer_vxlan_gpe_next_protocol || + misc3->outer_vxlan_gpe_flags); +} + +static bool +dr_matcher_supp_vxlan_gpe(struct mlx5dr_cmd_caps *caps) +{ + return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) || + (caps->flex_protocols & MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED); +} + +static bool +dr_mask_is_tnl_vxlan_gpe(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return dr_mask_is_vxlan_gpe_set(&mask->misc3) && + dr_matcher_supp_vxlan_gpe(&dmn->info.caps); +} + +static bool dr_mask_is_tnl_geneve_set(struct mlx5dr_match_misc *misc) +{ + return misc->geneve_vni || + misc->geneve_oam || + misc->geneve_protocol_type || + misc->geneve_opt_len; +} + +static bool dr_mask_is_tnl_geneve_tlv_opt(struct mlx5dr_match_misc3 *misc3) +{ + return misc3->geneve_tlv_option_0_data; +} + +static bool +dr_matcher_supp_flex_parser_ok(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_parser_ok_bits_supp; +} + +static bool dr_mask_is_tnl_geneve_tlv_opt_exist_set(struct mlx5dr_match_misc *misc, + struct mlx5dr_domain *dmn) +{ + return dr_matcher_supp_flex_parser_ok(&dmn->info.caps) && + misc->geneve_tlv_option_0_exist; +} + +static bool +dr_matcher_supp_tnl_geneve(struct mlx5dr_cmd_caps *caps) +{ + return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) || + (caps->flex_protocols & MLX5_FLEX_PARSER_GENEVE_ENABLED); +} + +static bool +dr_mask_is_tnl_geneve(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return dr_mask_is_tnl_geneve_set(&mask->misc) && + dr_matcher_supp_tnl_geneve(&dmn->info.caps); +} + +static bool dr_mask_is_tnl_gtpu_set(struct mlx5dr_match_misc3 *misc3) +{ + return misc3->gtpu_msg_flags || misc3->gtpu_msg_type || misc3->gtpu_teid; +} + +static bool dr_matcher_supp_tnl_gtpu(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_ENABLED; +} + +static bool dr_mask_is_tnl_gtpu(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return dr_mask_is_tnl_gtpu_set(&mask->misc3) && + dr_matcher_supp_tnl_gtpu(&dmn->info.caps); +} + +static int dr_matcher_supp_tnl_gtpu_dw_0(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_0_ENABLED; +} + +static bool dr_mask_is_tnl_gtpu_dw_0(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return mask->misc3.gtpu_dw_0 && + dr_matcher_supp_tnl_gtpu_dw_0(&dmn->info.caps); +} + +static int dr_matcher_supp_tnl_gtpu_teid(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_TEID_ENABLED; +} + +static bool dr_mask_is_tnl_gtpu_teid(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return mask->misc3.gtpu_teid && + dr_matcher_supp_tnl_gtpu_teid(&dmn->info.caps); +} + +static int dr_matcher_supp_tnl_gtpu_dw_2(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_2_ENABLED; +} + +static bool dr_mask_is_tnl_gtpu_dw_2(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return mask->misc3.gtpu_dw_2 && + dr_matcher_supp_tnl_gtpu_dw_2(&dmn->info.caps); +} + +static int dr_matcher_supp_tnl_gtpu_first_ext(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_FIRST_EXT_DW_0_ENABLED; +} + +static bool dr_mask_is_tnl_gtpu_first_ext(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return mask->misc3.gtpu_first_ext_dw_0 && + dr_matcher_supp_tnl_gtpu_first_ext(&dmn->info.caps); +} + +static bool dr_mask_is_tnl_gtpu_flex_parser_0(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + struct mlx5dr_cmd_caps *caps = &dmn->info.caps; + + return (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_dw_0) && + dr_mask_is_tnl_gtpu_dw_0(mask, dmn)) || + (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_teid) && + dr_mask_is_tnl_gtpu_teid(mask, dmn)) || + (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_dw_2) && + dr_mask_is_tnl_gtpu_dw_2(mask, dmn)) || + (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_first_ext_dw_0) && + dr_mask_is_tnl_gtpu_first_ext(mask, dmn)); +} + +static bool dr_mask_is_tnl_gtpu_flex_parser_1(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + struct mlx5dr_cmd_caps *caps = &dmn->info.caps; + + return (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_dw_0) && + dr_mask_is_tnl_gtpu_dw_0(mask, dmn)) || + (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_teid) && + dr_mask_is_tnl_gtpu_teid(mask, dmn)) || + (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_dw_2) && + dr_mask_is_tnl_gtpu_dw_2(mask, dmn)) || + (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_first_ext_dw_0) && + dr_mask_is_tnl_gtpu_first_ext(mask, dmn)); +} + +static bool dr_mask_is_tnl_gtpu_any(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return dr_mask_is_tnl_gtpu_flex_parser_0(mask, dmn) || + dr_mask_is_tnl_gtpu_flex_parser_1(mask, dmn) || + dr_mask_is_tnl_gtpu(mask, dmn); +} + +static int dr_matcher_supp_icmp_v4(struct mlx5dr_cmd_caps *caps) +{ + return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) || + (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED); +} + +static int dr_matcher_supp_icmp_v6(struct mlx5dr_cmd_caps *caps) +{ + return (caps->sw_format_ver >= MLX5_STEERING_FORMAT_CONNECTX_6DX) || + (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED); +} + +static bool dr_mask_is_icmpv6_set(struct mlx5dr_match_misc3 *misc3) +{ + return (misc3->icmpv6_type || misc3->icmpv6_code || + misc3->icmpv6_header_data); +} + +static bool dr_mask_is_icmp(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + if (DR_MASK_IS_ICMPV4_SET(&mask->misc3)) + return dr_matcher_supp_icmp_v4(&dmn->info.caps); + else if (dr_mask_is_icmpv6_set(&mask->misc3)) + return dr_matcher_supp_icmp_v6(&dmn->info.caps); + + return false; +} + +static bool dr_mask_is_wqe_metadata_set(struct mlx5dr_match_misc2 *misc2) +{ + return misc2->metadata_reg_a; +} + +static bool dr_mask_is_reg_c_0_3_set(struct mlx5dr_match_misc2 *misc2) +{ + return (misc2->metadata_reg_c_0 || misc2->metadata_reg_c_1 || + misc2->metadata_reg_c_2 || misc2->metadata_reg_c_3); +} + +static bool dr_mask_is_reg_c_4_7_set(struct mlx5dr_match_misc2 *misc2) +{ + return (misc2->metadata_reg_c_4 || misc2->metadata_reg_c_5 || + misc2->metadata_reg_c_6 || misc2->metadata_reg_c_7); +} + +static bool dr_mask_is_gvmi_or_qpn_set(struct mlx5dr_match_misc *misc) +{ + return (misc->source_sqn || misc->source_port); +} + +static bool dr_mask_is_flex_parser_id_0_3_set(u32 flex_parser_id, + u32 flex_parser_value) +{ + if (flex_parser_id) + return flex_parser_id <= DR_STE_MAX_FLEX_0_ID; + + /* Using flex_parser 0 means that id is zero, thus value must be set. */ + return flex_parser_value; +} + +static bool dr_mask_is_flex_parser_0_3_set(struct mlx5dr_match_misc4 *misc4) +{ + return (dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_0, + misc4->prog_sample_field_value_0) || + dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_1, + misc4->prog_sample_field_value_1) || + dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_2, + misc4->prog_sample_field_value_2) || + dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_3, + misc4->prog_sample_field_value_3)); +} + +static bool dr_mask_is_flex_parser_id_4_7_set(u32 flex_parser_id) +{ + return flex_parser_id > DR_STE_MAX_FLEX_0_ID && + flex_parser_id <= DR_STE_MAX_FLEX_1_ID; +} + +static bool dr_mask_is_flex_parser_4_7_set(struct mlx5dr_match_misc4 *misc4) +{ + return (dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_0) || + dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_1) || + dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_2) || + dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_3)); +} + +static int dr_matcher_supp_tnl_mpls_over_gre(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_GRE_ENABLED; +} + +static bool dr_mask_is_tnl_mpls_over_gre(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return DR_MASK_IS_OUTER_MPLS_OVER_GRE_SET(&mask->misc2) && + dr_matcher_supp_tnl_mpls_over_gre(&dmn->info.caps); +} + +static int dr_matcher_supp_tnl_mpls_over_udp(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED; +} + +static bool dr_mask_is_tnl_mpls_over_udp(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return DR_MASK_IS_OUTER_MPLS_OVER_UDP_SET(&mask->misc2) && + dr_matcher_supp_tnl_mpls_over_udp(&dmn->info.caps); +} + +static bool dr_mask_is_tnl_header_0_1_set(struct mlx5dr_match_misc5 *misc5) +{ + return misc5->tunnel_header_0 || misc5->tunnel_header_1; +} + +int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + enum mlx5dr_ipv outer_ipv, + enum mlx5dr_ipv inner_ipv) +{ + nic_matcher->ste_builder = + nic_matcher->ste_builder_arr[outer_ipv][inner_ipv]; + nic_matcher->num_of_builders = + nic_matcher->num_of_builders_arr[outer_ipv][inner_ipv]; + + if (!nic_matcher->num_of_builders) { + mlx5dr_dbg(matcher->tbl->dmn, + "Rule not supported on this matcher due to IP related fields\n"); + return -EINVAL; + } + + return 0; +} + +static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + enum mlx5dr_ipv outer_ipv, + enum mlx5dr_ipv inner_ipv) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx; + struct mlx5dr_match_param mask = {}; + bool allow_empty_match = false; + struct mlx5dr_ste_build *sb; + bool inner, rx; + int idx = 0; + int ret, i; + + sb = nic_matcher->ste_builder_arr[outer_ipv][inner_ipv]; + rx = nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX; + + /* Create a temporary mask to track and clear used mask fields */ + if (matcher->match_criteria & DR_MATCHER_CRITERIA_OUTER) + mask.outer = matcher->mask.outer; + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC) + mask.misc = matcher->mask.misc; + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_INNER) + mask.inner = matcher->mask.inner; + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC2) + mask.misc2 = matcher->mask.misc2; + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC3) + mask.misc3 = matcher->mask.misc3; + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC4) + mask.misc4 = matcher->mask.misc4; + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC5) + mask.misc5 = matcher->mask.misc5; + + ret = mlx5dr_ste_build_pre_check(dmn, matcher->match_criteria, + &matcher->mask, NULL); + if (ret) + return ret; + + /* Optimize RX pipe by reducing source port match, since + * the FDB RX part is connected only to the wire. + */ + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB && + rx && mask.misc.source_port) { + mask.misc.source_port = 0; + mask.misc.source_eswitch_owner_vhca_id = 0; + allow_empty_match = true; + } + + /* Outer */ + if (matcher->match_criteria & (DR_MATCHER_CRITERIA_OUTER | + DR_MATCHER_CRITERIA_MISC | + DR_MATCHER_CRITERIA_MISC2 | + DR_MATCHER_CRITERIA_MISC3 | + DR_MATCHER_CRITERIA_MISC5)) { + inner = false; + + if (dr_mask_is_wqe_metadata_set(&mask.misc2)) + mlx5dr_ste_build_general_purpose(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (dr_mask_is_reg_c_0_3_set(&mask.misc2)) + mlx5dr_ste_build_register_0(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (dr_mask_is_reg_c_4_7_set(&mask.misc2)) + mlx5dr_ste_build_register_1(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (dr_mask_is_gvmi_or_qpn_set(&mask.misc) && + (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || + dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX)) { + mlx5dr_ste_build_src_gvmi_qpn(ste_ctx, &sb[idx++], + &mask, dmn, inner, rx); + } + + if (dr_mask_is_smac_set(&mask.outer) && + dr_mask_is_dmac_set(&mask.outer)) { + mlx5dr_ste_build_eth_l2_src_dst(ste_ctx, &sb[idx++], + &mask, inner, rx); + } + + if (dr_mask_is_smac_set(&mask.outer)) + mlx5dr_ste_build_eth_l2_src(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (DR_MASK_IS_L2_DST(mask.outer, mask.misc, outer)) + mlx5dr_ste_build_eth_l2_dst(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (outer_ipv == DR_RULE_IPV6) { + if (DR_MASK_IS_DST_IP_SET(&mask.outer)) + mlx5dr_ste_build_eth_l3_ipv6_dst(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (DR_MASK_IS_SRC_IP_SET(&mask.outer)) + mlx5dr_ste_build_eth_l3_ipv6_src(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (DR_MASK_IS_ETH_L4_SET(mask.outer, mask.misc, outer)) + mlx5dr_ste_build_eth_ipv6_l3_l4(ste_ctx, &sb[idx++], + &mask, inner, rx); + } else { + if (dr_mask_is_ipv4_5_tuple_set(&mask.outer)) + mlx5dr_ste_build_eth_l3_ipv4_5_tuple(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (dr_mask_is_ttl_set(&mask.outer) || + dr_mask_is_ipv4_ihl_set(&mask.outer)) + mlx5dr_ste_build_eth_l3_ipv4_misc(ste_ctx, &sb[idx++], + &mask, inner, rx); + } + + if (dr_mask_is_tnl_vxlan_gpe(&mask, dmn)) + mlx5dr_ste_build_tnl_vxlan_gpe(ste_ctx, &sb[idx++], + &mask, inner, rx); + else if (dr_mask_is_tnl_geneve(&mask, dmn)) { + mlx5dr_ste_build_tnl_geneve(ste_ctx, &sb[idx++], + &mask, inner, rx); + if (dr_mask_is_tnl_geneve_tlv_opt(&mask.misc3)) + mlx5dr_ste_build_tnl_geneve_tlv_opt(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + if (dr_mask_is_tnl_geneve_tlv_opt_exist_set(&mask.misc, dmn)) + mlx5dr_ste_build_tnl_geneve_tlv_opt_exist(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + } else if (dr_mask_is_tnl_gtpu_any(&mask, dmn)) { + if (dr_mask_is_tnl_gtpu_flex_parser_0(&mask, dmn)) + mlx5dr_ste_build_tnl_gtpu_flex_parser_0(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + + if (dr_mask_is_tnl_gtpu_flex_parser_1(&mask, dmn)) + mlx5dr_ste_build_tnl_gtpu_flex_parser_1(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + + if (dr_mask_is_tnl_gtpu(&mask, dmn)) + mlx5dr_ste_build_tnl_gtpu(ste_ctx, &sb[idx++], + &mask, inner, rx); + } else if (dr_mask_is_tnl_header_0_1_set(&mask.misc5)) { + mlx5dr_ste_build_tnl_header_0_1(ste_ctx, &sb[idx++], + &mask, inner, rx); + } + + if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, outer)) + mlx5dr_ste_build_eth_l4_misc(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (DR_MASK_IS_FIRST_MPLS_SET(mask.misc2, outer)) + mlx5dr_ste_build_mpls(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (dr_mask_is_tnl_mpls_over_gre(&mask, dmn)) + mlx5dr_ste_build_tnl_mpls_over_gre(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + else if (dr_mask_is_tnl_mpls_over_udp(&mask, dmn)) + mlx5dr_ste_build_tnl_mpls_over_udp(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + + if (dr_mask_is_icmp(&mask, dmn)) + mlx5dr_ste_build_icmp(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + + if (dr_mask_is_tnl_gre_set(&mask.misc)) + mlx5dr_ste_build_tnl_gre(ste_ctx, &sb[idx++], + &mask, inner, rx); + } + + /* Inner */ + if (matcher->match_criteria & (DR_MATCHER_CRITERIA_INNER | + DR_MATCHER_CRITERIA_MISC | + DR_MATCHER_CRITERIA_MISC2 | + DR_MATCHER_CRITERIA_MISC3)) { + inner = true; + + if (dr_mask_is_eth_l2_tnl_set(&mask.misc)) + mlx5dr_ste_build_eth_l2_tnl(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (dr_mask_is_smac_set(&mask.inner) && + dr_mask_is_dmac_set(&mask.inner)) { + mlx5dr_ste_build_eth_l2_src_dst(ste_ctx, &sb[idx++], + &mask, inner, rx); + } + + if (dr_mask_is_smac_set(&mask.inner)) + mlx5dr_ste_build_eth_l2_src(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (DR_MASK_IS_L2_DST(mask.inner, mask.misc, inner)) + mlx5dr_ste_build_eth_l2_dst(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (inner_ipv == DR_RULE_IPV6) { + if (DR_MASK_IS_DST_IP_SET(&mask.inner)) + mlx5dr_ste_build_eth_l3_ipv6_dst(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (DR_MASK_IS_SRC_IP_SET(&mask.inner)) + mlx5dr_ste_build_eth_l3_ipv6_src(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (DR_MASK_IS_ETH_L4_SET(mask.inner, mask.misc, inner)) + mlx5dr_ste_build_eth_ipv6_l3_l4(ste_ctx, &sb[idx++], + &mask, inner, rx); + } else { + if (dr_mask_is_ipv4_5_tuple_set(&mask.inner)) + mlx5dr_ste_build_eth_l3_ipv4_5_tuple(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (dr_mask_is_ttl_set(&mask.inner) || + dr_mask_is_ipv4_ihl_set(&mask.inner)) + mlx5dr_ste_build_eth_l3_ipv4_misc(ste_ctx, &sb[idx++], + &mask, inner, rx); + } + + if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, inner)) + mlx5dr_ste_build_eth_l4_misc(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (DR_MASK_IS_FIRST_MPLS_SET(mask.misc2, inner)) + mlx5dr_ste_build_mpls(ste_ctx, &sb[idx++], + &mask, inner, rx); + + if (dr_mask_is_tnl_mpls_over_gre(&mask, dmn)) + mlx5dr_ste_build_tnl_mpls_over_gre(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + else if (dr_mask_is_tnl_mpls_over_udp(&mask, dmn)) + mlx5dr_ste_build_tnl_mpls_over_udp(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + } + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC4) { + if (dr_mask_is_flex_parser_0_3_set(&mask.misc4)) + mlx5dr_ste_build_flex_parser_0(ste_ctx, &sb[idx++], + &mask, false, rx); + + if (dr_mask_is_flex_parser_4_7_set(&mask.misc4)) + mlx5dr_ste_build_flex_parser_1(ste_ctx, &sb[idx++], + &mask, false, rx); + } + + /* Empty matcher, takes all */ + if ((!idx && allow_empty_match) || + matcher->match_criteria == DR_MATCHER_CRITERIA_EMPTY) + mlx5dr_ste_build_empty_always_hit(&sb[idx++], rx); + + if (idx == 0) { + mlx5dr_err(dmn, "Cannot generate any valid rules from mask\n"); + return -EINVAL; + } + + /* Check that all mask fields were consumed */ + for (i = 0; i < sizeof(struct mlx5dr_match_param); i++) { + if (((u8 *)&mask)[i] != 0) { + mlx5dr_dbg(dmn, "Mask contains unsupported parameters\n"); + return -EOPNOTSUPP; + } + } + + nic_matcher->ste_builder = sb; + nic_matcher->num_of_builders_arr[outer_ipv][inner_ipv] = idx; + + return 0; +} + +static int dr_nic_matcher_connect(struct mlx5dr_domain *dmn, + struct mlx5dr_matcher_rx_tx *curr_nic_matcher, + struct mlx5dr_matcher_rx_tx *next_nic_matcher, + struct mlx5dr_matcher_rx_tx *prev_nic_matcher) +{ + struct mlx5dr_table_rx_tx *nic_tbl = curr_nic_matcher->nic_tbl; + struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_ste_htbl *prev_htbl; + int ret; + + /* Connect end anchor hash table to next_htbl or to the default address */ + if (next_nic_matcher) { + info.type = CONNECT_HIT; + info.hit_next_htbl = next_nic_matcher->s_htbl; + } else { + info.type = CONNECT_MISS; + info.miss_icm_addr = nic_tbl->default_icm_addr; + } + ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, + curr_nic_matcher->e_anchor, + &info, info.type == CONNECT_HIT); + if (ret) + return ret; + + /* Connect start hash table to end anchor */ + info.type = CONNECT_MISS; + info.miss_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(curr_nic_matcher->e_anchor->chunk); + ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, + curr_nic_matcher->s_htbl, + &info, false); + if (ret) + return ret; + + /* Connect previous hash table to matcher start hash table */ + if (prev_nic_matcher) + prev_htbl = prev_nic_matcher->e_anchor; + else + prev_htbl = nic_tbl->s_anchor; + + info.type = CONNECT_HIT; + info.hit_next_htbl = curr_nic_matcher->s_htbl; + ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, prev_htbl, + &info, true); + if (ret) + return ret; + + /* Update the pointing ste and next hash table */ + curr_nic_matcher->s_htbl->pointing_ste = prev_htbl->chunk->ste_arr; + prev_htbl->chunk->ste_arr[0].next_htbl = curr_nic_matcher->s_htbl; + + if (next_nic_matcher) { + next_nic_matcher->s_htbl->pointing_ste = + curr_nic_matcher->e_anchor->chunk->ste_arr; + curr_nic_matcher->e_anchor->chunk->ste_arr[0].next_htbl = + next_nic_matcher->s_htbl; + } + + return 0; +} + +int mlx5dr_matcher_add_to_tbl_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + struct mlx5dr_matcher_rx_tx *next_nic_matcher, *prev_nic_matcher, *tmp_nic_matcher; + struct mlx5dr_table_rx_tx *nic_tbl = nic_matcher->nic_tbl; + bool first = true; + int ret; + + /* If the nic matcher is already on its parent nic table list, + * then it is already connected to the chain of nic matchers. + */ + if (!list_empty(&nic_matcher->list_node)) + return 0; + + next_nic_matcher = NULL; + list_for_each_entry(tmp_nic_matcher, &nic_tbl->nic_matcher_list, list_node) { + if (tmp_nic_matcher->prio >= nic_matcher->prio) { + next_nic_matcher = tmp_nic_matcher; + break; + } + first = false; + } + + prev_nic_matcher = NULL; + if (next_nic_matcher && !first) + prev_nic_matcher = list_prev_entry(next_nic_matcher, list_node); + else if (!first) + prev_nic_matcher = list_last_entry(&nic_tbl->nic_matcher_list, + struct mlx5dr_matcher_rx_tx, + list_node); + + ret = dr_nic_matcher_connect(dmn, nic_matcher, + next_nic_matcher, prev_nic_matcher); + if (ret) + return ret; + + if (prev_nic_matcher) + list_add(&nic_matcher->list_node, &prev_nic_matcher->list_node); + else if (next_nic_matcher) + list_add_tail(&nic_matcher->list_node, &next_nic_matcher->list_node); + else + list_add(&nic_matcher->list_node, &nic_matcher->nic_tbl->nic_matcher_list); + + return ret; +} + +static void dr_matcher_uninit_nic(struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + mlx5dr_htbl_put(nic_matcher->s_htbl); + mlx5dr_htbl_put(nic_matcher->e_anchor); +} + +static void dr_matcher_uninit_fdb(struct mlx5dr_matcher *matcher) +{ + dr_matcher_uninit_nic(&matcher->rx); + dr_matcher_uninit_nic(&matcher->tx); +} + +static void dr_matcher_uninit(struct mlx5dr_matcher *matcher) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + + switch (dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + dr_matcher_uninit_nic(&matcher->rx); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + dr_matcher_uninit_nic(&matcher->tx); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + dr_matcher_uninit_fdb(matcher); + break; + default: + WARN_ON(true); + break; + } +} + +static int dr_matcher_set_all_ste_builders(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + + dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV4, DR_RULE_IPV4); + dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV4, DR_RULE_IPV6); + dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV6, DR_RULE_IPV4); + dr_matcher_set_ste_builders(matcher, nic_matcher, DR_RULE_IPV6, DR_RULE_IPV6); + + if (!nic_matcher->ste_builder) { + mlx5dr_err(dmn, "Cannot generate IPv4 or IPv6 rules with given mask\n"); + return -EINVAL; + } + + return 0; +} + +static int dr_matcher_init_nic(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + int ret; + + nic_matcher->prio = matcher->prio; + INIT_LIST_HEAD(&nic_matcher->list_node); + + ret = dr_matcher_set_all_ste_builders(matcher, nic_matcher); + if (ret) + return ret; + + nic_matcher->e_anchor = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + DR_CHUNK_SIZE_1, + MLX5DR_STE_LU_TYPE_DONT_CARE, + 0); + if (!nic_matcher->e_anchor) + return -ENOMEM; + + nic_matcher->s_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + DR_CHUNK_SIZE_1, + nic_matcher->ste_builder[0].lu_type, + nic_matcher->ste_builder[0].byte_mask); + if (!nic_matcher->s_htbl) { + ret = -ENOMEM; + goto free_e_htbl; + } + + /* make sure the tables exist while empty */ + mlx5dr_htbl_get(nic_matcher->s_htbl); + mlx5dr_htbl_get(nic_matcher->e_anchor); + + return 0; + +free_e_htbl: + mlx5dr_ste_htbl_free(nic_matcher->e_anchor); + return ret; +} + +static int dr_matcher_init_fdb(struct mlx5dr_matcher *matcher) +{ + int ret; + + ret = dr_matcher_init_nic(matcher, &matcher->rx); + if (ret) + return ret; + + ret = dr_matcher_init_nic(matcher, &matcher->tx); + if (ret) + goto uninit_nic_rx; + + return 0; + +uninit_nic_rx: + dr_matcher_uninit_nic(&matcher->rx); + return ret; +} + +static int dr_matcher_copy_param(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *mask) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_match_parameters consumed_mask; + int i, ret = 0; + + if (matcher->match_criteria >= DR_MATCHER_CRITERIA_MAX) { + mlx5dr_err(dmn, "Invalid match criteria attribute\n"); + return -EINVAL; + } + + if (mask) { + if (mask->match_sz > DR_SZ_MATCH_PARAM) { + mlx5dr_err(dmn, "Invalid match size attribute\n"); + return -EINVAL; + } + + consumed_mask.match_buf = kzalloc(mask->match_sz, GFP_KERNEL); + if (!consumed_mask.match_buf) + return -ENOMEM; + + consumed_mask.match_sz = mask->match_sz; + memcpy(consumed_mask.match_buf, mask->match_buf, mask->match_sz); + mlx5dr_ste_copy_param(matcher->match_criteria, + &matcher->mask, &consumed_mask, true); + + /* Check that all mask data was consumed */ + for (i = 0; i < consumed_mask.match_sz; i++) { + if (!((u8 *)consumed_mask.match_buf)[i]) + continue; + + mlx5dr_dbg(dmn, + "Match param mask contains unsupported parameters\n"); + ret = -EOPNOTSUPP; + break; + } + + kfree(consumed_mask.match_buf); + } + + return ret; +} + +static int dr_matcher_init(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *mask) +{ + struct mlx5dr_table *tbl = matcher->tbl; + struct mlx5dr_domain *dmn = tbl->dmn; + int ret; + + ret = dr_matcher_copy_param(matcher, mask); + if (ret) + return ret; + + switch (dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + matcher->rx.nic_tbl = &tbl->rx; + ret = dr_matcher_init_nic(matcher, &matcher->rx); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + matcher->tx.nic_tbl = &tbl->tx; + ret = dr_matcher_init_nic(matcher, &matcher->tx); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + matcher->rx.nic_tbl = &tbl->rx; + matcher->tx.nic_tbl = &tbl->tx; + ret = dr_matcher_init_fdb(matcher); + break; + default: + WARN_ON(true); + ret = -EINVAL; + } + + return ret; +} + +static void dr_matcher_add_to_dbg_list(struct mlx5dr_matcher *matcher) +{ + mutex_lock(&matcher->tbl->dmn->dump_info.dbg_mutex); + list_add(&matcher->list_node, &matcher->tbl->matcher_list); + mutex_unlock(&matcher->tbl->dmn->dump_info.dbg_mutex); +} + +static void dr_matcher_remove_from_dbg_list(struct mlx5dr_matcher *matcher) +{ + mutex_lock(&matcher->tbl->dmn->dump_info.dbg_mutex); + list_del(&matcher->list_node); + mutex_unlock(&matcher->tbl->dmn->dump_info.dbg_mutex); +} + +struct mlx5dr_matcher * +mlx5dr_matcher_create(struct mlx5dr_table *tbl, + u32 priority, + u8 match_criteria_enable, + struct mlx5dr_match_parameters *mask) +{ + struct mlx5dr_matcher *matcher; + int ret; + + refcount_inc(&tbl->refcount); + + matcher = kzalloc(sizeof(*matcher), GFP_KERNEL); + if (!matcher) + goto dec_ref; + + matcher->tbl = tbl; + matcher->prio = priority; + matcher->match_criteria = match_criteria_enable; + refcount_set(&matcher->refcount, 1); + INIT_LIST_HEAD(&matcher->list_node); + INIT_LIST_HEAD(&matcher->dbg_rule_list); + + mlx5dr_domain_lock(tbl->dmn); + + ret = dr_matcher_init(matcher, mask); + if (ret) + goto free_matcher; + + dr_matcher_add_to_dbg_list(matcher); + + mlx5dr_domain_unlock(tbl->dmn); + + return matcher; + +free_matcher: + mlx5dr_domain_unlock(tbl->dmn); + kfree(matcher); +dec_ref: + refcount_dec(&tbl->refcount); + return NULL; +} + +static int dr_matcher_disconnect_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_table_rx_tx *nic_tbl, + struct mlx5dr_matcher_rx_tx *next_nic_matcher, + struct mlx5dr_matcher_rx_tx *prev_nic_matcher) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_ste_htbl *prev_anchor; + + if (prev_nic_matcher) + prev_anchor = prev_nic_matcher->e_anchor; + else + prev_anchor = nic_tbl->s_anchor; + + /* Connect previous anchor hash table to next matcher or to the default address */ + if (next_nic_matcher) { + info.type = CONNECT_HIT; + info.hit_next_htbl = next_nic_matcher->s_htbl; + next_nic_matcher->s_htbl->pointing_ste = prev_anchor->chunk->ste_arr; + prev_anchor->chunk->ste_arr[0].next_htbl = next_nic_matcher->s_htbl; + } else { + info.type = CONNECT_MISS; + info.miss_icm_addr = nic_tbl->default_icm_addr; + prev_anchor->chunk->ste_arr[0].next_htbl = NULL; + } + + return mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, prev_anchor, + &info, true); +} + +int mlx5dr_matcher_remove_from_tbl_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + struct mlx5dr_matcher_rx_tx *prev_nic_matcher, *next_nic_matcher; + struct mlx5dr_table_rx_tx *nic_tbl = nic_matcher->nic_tbl; + int ret; + + /* If the nic matcher is not on its parent nic table list, + * then it is detached - no need to disconnect it. + */ + if (list_empty(&nic_matcher->list_node)) + return 0; + + if (list_is_last(&nic_matcher->list_node, &nic_tbl->nic_matcher_list)) + next_nic_matcher = NULL; + else + next_nic_matcher = list_next_entry(nic_matcher, list_node); + + if (nic_matcher->list_node.prev == &nic_tbl->nic_matcher_list) + prev_nic_matcher = NULL; + else + prev_nic_matcher = list_prev_entry(nic_matcher, list_node); + + ret = dr_matcher_disconnect_nic(dmn, nic_tbl, next_nic_matcher, prev_nic_matcher); + if (ret) + return ret; + + list_del_init(&nic_matcher->list_node); + return 0; +} + +int mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher) +{ + struct mlx5dr_table *tbl = matcher->tbl; + + if (WARN_ON_ONCE(refcount_read(&matcher->refcount) > 1)) + return -EBUSY; + + mlx5dr_domain_lock(tbl->dmn); + + dr_matcher_remove_from_dbg_list(matcher); + dr_matcher_uninit(matcher); + refcount_dec(&matcher->tbl->refcount); + + mlx5dr_domain_unlock(tbl->dmn); + kfree(matcher); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c new file mode 100644 index 0000000000..8ca534ef5d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "dr_types.h" +#include "mlx5_ifc_dr_ste_v1.h" + +enum dr_ptrn_modify_hdr_action_id { + DR_PTRN_MODIFY_HDR_ACTION_ID_NOP = 0x00, + DR_PTRN_MODIFY_HDR_ACTION_ID_COPY = 0x05, + DR_PTRN_MODIFY_HDR_ACTION_ID_SET = 0x06, + DR_PTRN_MODIFY_HDR_ACTION_ID_ADD = 0x07, + DR_PTRN_MODIFY_HDR_ACTION_ID_INSERT_INLINE = 0x0a, +}; + +struct mlx5dr_ptrn_mgr { + struct mlx5dr_domain *dmn; + struct mlx5dr_icm_pool *ptrn_icm_pool; + /* cache for modify_header ptrn */ + struct list_head ptrn_list; + struct mutex modify_hdr_mutex; /* protect the pattern cache */ +}; + +/* Cache structure and functions */ +static bool dr_ptrn_compare_modify_hdr(size_t cur_num_of_actions, + __be64 cur_hw_actions[], + size_t num_of_actions, + __be64 hw_actions[]) +{ + int i; + + if (cur_num_of_actions != num_of_actions) + return false; + + for (i = 0; i < num_of_actions; i++) { + u8 action_id = + MLX5_GET(ste_double_action_set_v1, &hw_actions[i], action_id); + + if (action_id == DR_PTRN_MODIFY_HDR_ACTION_ID_COPY) { + if (hw_actions[i] != cur_hw_actions[i]) + return false; + } else { + if ((__force __be32)hw_actions[i] != + (__force __be32)cur_hw_actions[i]) + return false; + } + } + + return true; +} + +static struct mlx5dr_ptrn_obj * +dr_ptrn_find_cached_pattern(struct mlx5dr_ptrn_mgr *mgr, + size_t num_of_actions, + __be64 hw_actions[]) +{ + struct mlx5dr_ptrn_obj *cached_pattern; + struct mlx5dr_ptrn_obj *tmp; + + list_for_each_entry_safe(cached_pattern, tmp, &mgr->ptrn_list, list) { + if (dr_ptrn_compare_modify_hdr(cached_pattern->num_of_actions, + (__be64 *)cached_pattern->data, + num_of_actions, + hw_actions)) { + /* Put this pattern in the head of the list, + * as we will probably use it more. + */ + list_del_init(&cached_pattern->list); + list_add(&cached_pattern->list, &mgr->ptrn_list); + return cached_pattern; + } + } + + return NULL; +} + +static struct mlx5dr_ptrn_obj * +dr_ptrn_alloc_pattern(struct mlx5dr_ptrn_mgr *mgr, + u16 num_of_actions, u8 *data) +{ + struct mlx5dr_ptrn_obj *pattern; + struct mlx5dr_icm_chunk *chunk; + u32 chunk_size; + u32 index; + + chunk_size = ilog2(roundup_pow_of_two(num_of_actions)); + /* HW modify action index granularity is at least 64B */ + chunk_size = max_t(u32, chunk_size, DR_CHUNK_SIZE_8); + + chunk = mlx5dr_icm_alloc_chunk(mgr->ptrn_icm_pool, chunk_size); + if (!chunk) + return NULL; + + index = (mlx5dr_icm_pool_get_chunk_icm_addr(chunk) - + mgr->dmn->info.caps.hdr_modify_pattern_icm_addr) / + DR_ACTION_CACHE_LINE_SIZE; + + pattern = kzalloc(sizeof(*pattern), GFP_KERNEL); + if (!pattern) + goto free_chunk; + + pattern->data = kzalloc(num_of_actions * DR_MODIFY_ACTION_SIZE * + sizeof(*pattern->data), GFP_KERNEL); + if (!pattern->data) + goto free_pattern; + + memcpy(pattern->data, data, num_of_actions * DR_MODIFY_ACTION_SIZE); + pattern->chunk = chunk; + pattern->index = index; + pattern->num_of_actions = num_of_actions; + + list_add(&pattern->list, &mgr->ptrn_list); + refcount_set(&pattern->refcount, 1); + + return pattern; + +free_pattern: + kfree(pattern); +free_chunk: + mlx5dr_icm_free_chunk(chunk); + return NULL; +} + +static void +dr_ptrn_free_pattern(struct mlx5dr_ptrn_obj *pattern) +{ + list_del(&pattern->list); + mlx5dr_icm_free_chunk(pattern->chunk); + kfree(pattern->data); + kfree(pattern); +} + +struct mlx5dr_ptrn_obj * +mlx5dr_ptrn_cache_get_pattern(struct mlx5dr_ptrn_mgr *mgr, + u16 num_of_actions, + u8 *data) +{ + struct mlx5dr_ptrn_obj *pattern; + u64 *hw_actions; + u8 action_id; + int i; + + mutex_lock(&mgr->modify_hdr_mutex); + pattern = dr_ptrn_find_cached_pattern(mgr, + num_of_actions, + (__be64 *)data); + if (!pattern) { + /* Alloc and add new pattern to cache */ + pattern = dr_ptrn_alloc_pattern(mgr, num_of_actions, data); + if (!pattern) + goto out_unlock; + + hw_actions = (u64 *)pattern->data; + /* Here we mask the pattern data to create a valid pattern + * since we do an OR operation between the arg and pattern + */ + for (i = 0; i < num_of_actions; i++) { + action_id = MLX5_GET(ste_double_action_set_v1, &hw_actions[i], action_id); + + if (action_id == DR_PTRN_MODIFY_HDR_ACTION_ID_SET || + action_id == DR_PTRN_MODIFY_HDR_ACTION_ID_ADD || + action_id == DR_PTRN_MODIFY_HDR_ACTION_ID_INSERT_INLINE) + MLX5_SET(ste_double_action_set_v1, &hw_actions[i], inline_data, 0); + } + + if (mlx5dr_send_postsend_pattern(mgr->dmn, pattern->chunk, + num_of_actions, pattern->data)) { + refcount_dec(&pattern->refcount); + goto free_pattern; + } + } else { + refcount_inc(&pattern->refcount); + } + + mutex_unlock(&mgr->modify_hdr_mutex); + + return pattern; + +free_pattern: + dr_ptrn_free_pattern(pattern); +out_unlock: + mutex_unlock(&mgr->modify_hdr_mutex); + return NULL; +} + +void +mlx5dr_ptrn_cache_put_pattern(struct mlx5dr_ptrn_mgr *mgr, + struct mlx5dr_ptrn_obj *pattern) +{ + mutex_lock(&mgr->modify_hdr_mutex); + + if (refcount_dec_and_test(&pattern->refcount)) + dr_ptrn_free_pattern(pattern); + + mutex_unlock(&mgr->modify_hdr_mutex); +} + +struct mlx5dr_ptrn_mgr *mlx5dr_ptrn_mgr_create(struct mlx5dr_domain *dmn) +{ + struct mlx5dr_ptrn_mgr *mgr; + + if (!mlx5dr_domain_is_support_ptrn_arg(dmn)) + return NULL; + + mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); + if (!mgr) + return NULL; + + mgr->dmn = dmn; + mgr->ptrn_icm_pool = mlx5dr_icm_pool_create(dmn, DR_ICM_TYPE_MODIFY_HDR_PTRN); + if (!mgr->ptrn_icm_pool) { + mlx5dr_err(dmn, "Couldn't get modify-header-pattern memory\n"); + goto free_mgr; + } + + INIT_LIST_HEAD(&mgr->ptrn_list); + mutex_init(&mgr->modify_hdr_mutex); + + return mgr; + +free_mgr: + kfree(mgr); + return NULL; +} + +void mlx5dr_ptrn_mgr_destroy(struct mlx5dr_ptrn_mgr *mgr) +{ + struct mlx5dr_ptrn_obj *pattern; + struct mlx5dr_ptrn_obj *tmp; + + if (!mgr) + return; + + WARN_ON(!list_empty(&mgr->ptrn_list)); + + list_for_each_entry_safe(pattern, tmp, &mgr->ptrn_list, list) { + list_del(&pattern->list); + kfree(pattern->data); + kfree(pattern); + } + + mlx5dr_icm_pool_destroy(mgr->ptrn_icm_pool); + mutex_destroy(&mgr->modify_hdr_mutex); + kfree(mgr); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c new file mode 100644 index 0000000000..042ca03491 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -0,0 +1,1377 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +#if defined(CONFIG_FRAME_WARN) && (CONFIG_FRAME_WARN < 2048) +/* don't try to optimize STE allocation if the stack is too constaraining */ +#define DR_RULE_MAX_STES_OPTIMIZED 0 +#else +#define DR_RULE_MAX_STES_OPTIMIZED 5 +#endif +#define DR_RULE_MAX_STE_CHAIN_OPTIMIZED (DR_RULE_MAX_STES_OPTIMIZED + DR_ACTION_MAX_STES) + +static int dr_rule_append_to_miss_list(struct mlx5dr_domain *dmn, + enum mlx5dr_domain_nic_type nic_type, + struct mlx5dr_ste *new_last_ste, + struct list_head *miss_list, + struct list_head *send_list) +{ + struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx; + struct mlx5dr_ste_send_info *ste_info_last; + struct mlx5dr_ste *last_ste; + + /* The new entry will be inserted after the last */ + last_ste = list_last_entry(miss_list, struct mlx5dr_ste, miss_list_node); + WARN_ON(!last_ste); + + ste_info_last = mlx5dr_send_info_alloc(dmn, nic_type); + if (!ste_info_last) + return -ENOMEM; + + mlx5dr_ste_set_miss_addr(ste_ctx, mlx5dr_ste_get_hw_ste(last_ste), + mlx5dr_ste_get_icm_addr(new_last_ste)); + list_add_tail(&new_last_ste->miss_list_node, miss_list); + + mlx5dr_send_fill_and_append_ste_send_info(last_ste, DR_STE_SIZE_CTRL, + 0, mlx5dr_ste_get_hw_ste(last_ste), + ste_info_last, send_list, true); + + return 0; +} + +static void dr_rule_set_last_ste_miss_addr(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + u8 *hw_ste) +{ + struct mlx5dr_ste_ctx *ste_ctx = matcher->tbl->dmn->ste_ctx; + u64 icm_addr; + + if (mlx5dr_ste_is_miss_addr_set(ste_ctx, hw_ste)) + return; + + icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); + mlx5dr_ste_set_miss_addr(ste_ctx, hw_ste, icm_addr); +} + +static struct mlx5dr_ste * +dr_rule_create_collision_htbl(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + u8 *hw_ste) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste_htbl *new_htbl; + struct mlx5dr_ste *ste; + + /* Create new table for miss entry */ + new_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + DR_CHUNK_SIZE_1, + MLX5DR_STE_LU_TYPE_DONT_CARE, + 0); + if (!new_htbl) { + mlx5dr_dbg(dmn, "Failed allocating collision table\n"); + return NULL; + } + + /* One and only entry, never grows */ + ste = new_htbl->chunk->ste_arr; + dr_rule_set_last_ste_miss_addr(matcher, nic_matcher, hw_ste); + mlx5dr_htbl_get(new_htbl); + + return ste; +} + +static struct mlx5dr_ste * +dr_rule_create_collision_entry(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + u8 *hw_ste, + struct mlx5dr_ste *orig_ste) +{ + struct mlx5dr_ste *ste; + + ste = dr_rule_create_collision_htbl(matcher, nic_matcher, hw_ste); + if (!ste) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed creating collision entry\n"); + return NULL; + } + + ste->ste_chain_location = orig_ste->ste_chain_location; + ste->htbl->pointing_ste = orig_ste->htbl->pointing_ste; + + /* In collision entry, all members share the same miss_list_head */ + ste->htbl->chunk->miss_list = mlx5dr_ste_get_miss_list(orig_ste); + + /* Next table */ + if (mlx5dr_ste_create_next_htbl(matcher, nic_matcher, ste, hw_ste, + DR_CHUNK_SIZE_1)) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed allocating table\n"); + goto free_tbl; + } + + return ste; + +free_tbl: + mlx5dr_ste_free(ste, matcher, nic_matcher); + return NULL; +} + +static int +dr_rule_handle_one_ste_in_update_list(struct mlx5dr_ste_send_info *ste_info, + struct mlx5dr_domain *dmn) +{ + int ret; + + list_del(&ste_info->send_list); + + /* Copy data to ste, only reduced size or control, the last 16B (mask) + * is already written to the hw. + */ + if (ste_info->size == DR_STE_SIZE_CTRL) + memcpy(mlx5dr_ste_get_hw_ste(ste_info->ste), + ste_info->data, DR_STE_SIZE_CTRL); + else + memcpy(mlx5dr_ste_get_hw_ste(ste_info->ste), + ste_info->data, DR_STE_SIZE_REDUCED); + + ret = mlx5dr_send_postsend_ste(dmn, ste_info->ste, ste_info->data, + ste_info->size, ste_info->offset); + if (ret) + goto out; + +out: + mlx5dr_send_info_free(ste_info); + return ret; +} + +static int dr_rule_send_update_list(struct list_head *send_ste_list, + struct mlx5dr_domain *dmn, + bool is_reverse) +{ + struct mlx5dr_ste_send_info *ste_info, *tmp_ste_info; + int ret; + + if (is_reverse) { + list_for_each_entry_safe_reverse(ste_info, tmp_ste_info, + send_ste_list, send_list) { + ret = dr_rule_handle_one_ste_in_update_list(ste_info, + dmn); + if (ret) + return ret; + } + } else { + list_for_each_entry_safe(ste_info, tmp_ste_info, + send_ste_list, send_list) { + ret = dr_rule_handle_one_ste_in_update_list(ste_info, + dmn); + if (ret) + return ret; + } + } + + return 0; +} + +static struct mlx5dr_ste * +dr_rule_find_ste_in_miss_list(struct list_head *miss_list, u8 *hw_ste) +{ + struct mlx5dr_ste *ste; + + if (list_empty(miss_list)) + return NULL; + + /* Check if hw_ste is present in the list */ + list_for_each_entry(ste, miss_list, miss_list_node) { + if (mlx5dr_ste_equal_tag(mlx5dr_ste_get_hw_ste(ste), hw_ste)) + return ste; + } + + return NULL; +} + +static struct mlx5dr_ste * +dr_rule_rehash_handle_collision(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct list_head *update_list, + struct mlx5dr_ste *col_ste, + u8 *hw_ste) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste *new_ste; + int ret; + + new_ste = dr_rule_create_collision_htbl(matcher, nic_matcher, hw_ste); + if (!new_ste) + return NULL; + + /* Update collision pointing STE */ + new_ste->htbl->pointing_ste = col_ste->htbl->pointing_ste; + + /* In collision entry, all members share the same miss_list_head */ + new_ste->htbl->chunk->miss_list = mlx5dr_ste_get_miss_list(col_ste); + + /* Update the previous from the list */ + ret = dr_rule_append_to_miss_list(dmn, nic_matcher->nic_tbl->nic_dmn->type, + new_ste, mlx5dr_ste_get_miss_list(col_ste), + update_list); + if (ret) { + mlx5dr_dbg(dmn, "Failed update dup entry\n"); + goto err_exit; + } + + return new_ste; + +err_exit: + mlx5dr_ste_free(new_ste, matcher, nic_matcher); + return NULL; +} + +static void dr_rule_rehash_copy_ste_ctrl(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *cur_ste, + struct mlx5dr_ste *new_ste) +{ + new_ste->next_htbl = cur_ste->next_htbl; + new_ste->ste_chain_location = cur_ste->ste_chain_location; + + if (new_ste->next_htbl) + new_ste->next_htbl->pointing_ste = new_ste; + + /* We need to copy the refcount since this ste + * may have been traversed several times + */ + new_ste->refcount = cur_ste->refcount; + + /* Link old STEs rule to the new ste */ + mlx5dr_rule_set_last_member(cur_ste->rule_rx_tx, new_ste, false); +} + +static struct mlx5dr_ste * +dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *cur_ste, + struct mlx5dr_ste_htbl *new_htbl, + struct list_head *update_list) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste_send_info *ste_info; + bool use_update_list = false; + u8 hw_ste[DR_STE_SIZE] = {}; + struct mlx5dr_ste *new_ste; + int new_idx; + u8 sb_idx; + + /* Copy STE mask from the matcher */ + sb_idx = cur_ste->ste_chain_location - 1; + mlx5dr_ste_set_bit_mask(hw_ste, nic_matcher->ste_builder[sb_idx].bit_mask); + + /* Copy STE control and tag */ + memcpy(hw_ste, mlx5dr_ste_get_hw_ste(cur_ste), DR_STE_SIZE_REDUCED); + dr_rule_set_last_ste_miss_addr(matcher, nic_matcher, hw_ste); + + new_idx = mlx5dr_ste_calc_hash_index(hw_ste, new_htbl); + new_ste = &new_htbl->chunk->ste_arr[new_idx]; + + if (mlx5dr_ste_is_not_used(new_ste)) { + mlx5dr_htbl_get(new_htbl); + list_add_tail(&new_ste->miss_list_node, + mlx5dr_ste_get_miss_list(new_ste)); + } else { + new_ste = dr_rule_rehash_handle_collision(matcher, + nic_matcher, + update_list, + new_ste, + hw_ste); + if (!new_ste) { + mlx5dr_dbg(dmn, "Failed adding collision entry, index: %d\n", + new_idx); + return NULL; + } + new_htbl->ctrl.num_of_collisions++; + use_update_list = true; + } + + memcpy(mlx5dr_ste_get_hw_ste(new_ste), hw_ste, DR_STE_SIZE_REDUCED); + + new_htbl->ctrl.num_of_valid_entries++; + + if (use_update_list) { + ste_info = mlx5dr_send_info_alloc(dmn, + nic_matcher->nic_tbl->nic_dmn->type); + if (!ste_info) + goto err_exit; + + mlx5dr_send_fill_and_append_ste_send_info(new_ste, DR_STE_SIZE, 0, + hw_ste, ste_info, + update_list, true); + } + + dr_rule_rehash_copy_ste_ctrl(matcher, nic_matcher, cur_ste, new_ste); + + return new_ste; + +err_exit: + mlx5dr_ste_free(new_ste, matcher, nic_matcher); + return NULL; +} + +static int dr_rule_rehash_copy_miss_list(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct list_head *cur_miss_list, + struct mlx5dr_ste_htbl *new_htbl, + struct list_head *update_list) +{ + struct mlx5dr_ste *tmp_ste, *cur_ste, *new_ste; + + if (list_empty(cur_miss_list)) + return 0; + + list_for_each_entry_safe(cur_ste, tmp_ste, cur_miss_list, miss_list_node) { + new_ste = dr_rule_rehash_copy_ste(matcher, + nic_matcher, + cur_ste, + new_htbl, + update_list); + if (!new_ste) + goto err_insert; + + list_del(&cur_ste->miss_list_node); + mlx5dr_htbl_put(cur_ste->htbl); + } + return 0; + +err_insert: + mlx5dr_err(matcher->tbl->dmn, "Fatal error during resize\n"); + WARN_ON(true); + return -EINVAL; +} + +static int dr_rule_rehash_copy_htbl(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste_htbl *cur_htbl, + struct mlx5dr_ste_htbl *new_htbl, + struct list_head *update_list) +{ + struct mlx5dr_ste *cur_ste; + int cur_entries; + int err = 0; + int i; + + cur_entries = mlx5dr_icm_pool_chunk_size_to_entries(cur_htbl->chunk->size); + + if (cur_entries < 1) { + mlx5dr_dbg(matcher->tbl->dmn, "Invalid number of entries\n"); + return -EINVAL; + } + + for (i = 0; i < cur_entries; i++) { + cur_ste = &cur_htbl->chunk->ste_arr[i]; + if (mlx5dr_ste_is_not_used(cur_ste)) /* Empty, nothing to copy */ + continue; + + err = dr_rule_rehash_copy_miss_list(matcher, + nic_matcher, + mlx5dr_ste_get_miss_list(cur_ste), + new_htbl, + update_list); + if (err) + goto clean_copy; + + /* In order to decrease the number of allocated ste_send_info + * structs, send the current table row now. + */ + err = dr_rule_send_update_list(update_list, matcher->tbl->dmn, false); + if (err) { + mlx5dr_dbg(matcher->tbl->dmn, "Failed updating table to HW\n"); + goto clean_copy; + } + } + +clean_copy: + return err; +} + +static struct mlx5dr_ste_htbl * +dr_rule_rehash_htbl(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule, + struct mlx5dr_ste_htbl *cur_htbl, + u8 ste_location, + struct list_head *update_list, + enum mlx5dr_icm_chunk_size new_size) +{ + struct mlx5dr_ste_send_info *del_ste_info, *tmp_ste_info; + struct mlx5dr_matcher *matcher = rule->matcher; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_matcher_rx_tx *nic_matcher; + struct mlx5dr_ste_send_info *ste_info; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_domain_rx_tx *nic_dmn; + u8 formatted_ste[DR_STE_SIZE] = {}; + LIST_HEAD(rehash_table_send_list); + struct mlx5dr_ste *ste_to_update; + struct mlx5dr_ste_htbl *new_htbl; + int err; + + nic_matcher = nic_rule->nic_matcher; + nic_dmn = nic_matcher->nic_tbl->nic_dmn; + + ste_info = mlx5dr_send_info_alloc(dmn, + nic_matcher->nic_tbl->nic_dmn->type); + if (!ste_info) + return NULL; + + new_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + new_size, + cur_htbl->lu_type, + cur_htbl->byte_mask); + if (!new_htbl) { + mlx5dr_err(dmn, "Failed to allocate new hash table\n"); + goto free_ste_info; + } + + /* Write new table to HW */ + info.type = CONNECT_MISS; + info.miss_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); + mlx5dr_ste_set_formatted_ste(dmn->ste_ctx, + dmn->info.caps.gvmi, + nic_dmn->type, + new_htbl, + formatted_ste, + &info); + + new_htbl->pointing_ste = cur_htbl->pointing_ste; + new_htbl->pointing_ste->next_htbl = new_htbl; + err = dr_rule_rehash_copy_htbl(matcher, + nic_matcher, + cur_htbl, + new_htbl, + &rehash_table_send_list); + if (err) + goto free_new_htbl; + + if (mlx5dr_send_postsend_htbl(dmn, new_htbl, formatted_ste, + nic_matcher->ste_builder[ste_location - 1].bit_mask)) { + mlx5dr_err(dmn, "Failed writing table to HW\n"); + goto free_new_htbl; + } + + /* Writing to the hw is done in regular order of rehash_table_send_list, + * in order to have the origin data written before the miss address of + * collision entries, if exists. + */ + if (dr_rule_send_update_list(&rehash_table_send_list, dmn, false)) { + mlx5dr_err(dmn, "Failed updating table to HW\n"); + goto free_ste_list; + } + + /* Connect previous hash table to current */ + if (ste_location == 1) { + /* The previous table is an anchor, anchors size is always one STE */ + struct mlx5dr_ste_htbl *prev_htbl = cur_htbl->pointing_ste->htbl; + + /* On matcher s_anchor we keep an extra refcount */ + mlx5dr_htbl_get(new_htbl); + mlx5dr_htbl_put(cur_htbl); + + nic_matcher->s_htbl = new_htbl; + + /* It is safe to operate dr_ste_set_hit_addr on the hw_ste here + * (48B len) which works only on first 32B + */ + mlx5dr_ste_set_hit_addr(dmn->ste_ctx, + prev_htbl->chunk->hw_ste_arr, + mlx5dr_icm_pool_get_chunk_icm_addr(new_htbl->chunk), + mlx5dr_icm_pool_get_chunk_num_of_entries(new_htbl->chunk)); + + ste_to_update = &prev_htbl->chunk->ste_arr[0]; + } else { + mlx5dr_ste_set_hit_addr_by_next_htbl(dmn->ste_ctx, + mlx5dr_ste_get_hw_ste(cur_htbl->pointing_ste), + new_htbl); + ste_to_update = cur_htbl->pointing_ste; + } + + mlx5dr_send_fill_and_append_ste_send_info(ste_to_update, DR_STE_SIZE_CTRL, + 0, mlx5dr_ste_get_hw_ste(ste_to_update), + ste_info, update_list, false); + + return new_htbl; + +free_ste_list: + /* Clean all ste_info's from the new table */ + list_for_each_entry_safe(del_ste_info, tmp_ste_info, + &rehash_table_send_list, send_list) { + list_del(&del_ste_info->send_list); + mlx5dr_send_info_free(del_ste_info); + } + +free_new_htbl: + mlx5dr_ste_htbl_free(new_htbl); +free_ste_info: + mlx5dr_send_info_free(ste_info); + mlx5dr_info(dmn, "Failed creating rehash table\n"); + return NULL; +} + +static struct mlx5dr_ste_htbl *dr_rule_rehash(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule, + struct mlx5dr_ste_htbl *cur_htbl, + u8 ste_location, + struct list_head *update_list) +{ + struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn; + enum mlx5dr_icm_chunk_size new_size; + + new_size = mlx5dr_icm_next_higher_chunk(cur_htbl->chunk->size); + new_size = min_t(u32, new_size, dmn->info.max_log_sw_icm_sz); + + if (new_size == cur_htbl->chunk->size) + return NULL; /* Skip rehash, we already at the max size */ + + return dr_rule_rehash_htbl(rule, nic_rule, cur_htbl, ste_location, + update_list, new_size); +} + +static struct mlx5dr_ste * +dr_rule_handle_collision(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *ste, + u8 *hw_ste, + struct list_head *miss_list, + struct list_head *send_list) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste_send_info *ste_info; + struct mlx5dr_ste *new_ste; + + ste_info = mlx5dr_send_info_alloc(dmn, + nic_matcher->nic_tbl->nic_dmn->type); + if (!ste_info) + return NULL; + + new_ste = dr_rule_create_collision_entry(matcher, nic_matcher, hw_ste, ste); + if (!new_ste) + goto free_send_info; + + if (dr_rule_append_to_miss_list(dmn, nic_matcher->nic_tbl->nic_dmn->type, + new_ste, miss_list, send_list)) { + mlx5dr_dbg(dmn, "Failed to update prev miss_list\n"); + goto err_exit; + } + + mlx5dr_send_fill_and_append_ste_send_info(new_ste, DR_STE_SIZE, 0, hw_ste, + ste_info, send_list, false); + + ste->htbl->ctrl.num_of_collisions++; + ste->htbl->ctrl.num_of_valid_entries++; + + return new_ste; + +err_exit: + mlx5dr_ste_free(new_ste, matcher, nic_matcher); +free_send_info: + mlx5dr_send_info_free(ste_info); + return NULL; +} + +static void dr_rule_remove_action_members(struct mlx5dr_rule *rule) +{ + struct mlx5dr_rule_action_member *action_mem; + struct mlx5dr_rule_action_member *tmp; + + list_for_each_entry_safe(action_mem, tmp, &rule->rule_actions_list, list) { + list_del(&action_mem->list); + refcount_dec(&action_mem->action->refcount); + kvfree(action_mem); + } +} + +static int dr_rule_add_action_members(struct mlx5dr_rule *rule, + size_t num_actions, + struct mlx5dr_action *actions[]) +{ + struct mlx5dr_rule_action_member *action_mem; + int i; + + for (i = 0; i < num_actions; i++) { + action_mem = kvzalloc(sizeof(*action_mem), GFP_KERNEL); + if (!action_mem) + goto free_action_members; + + action_mem->action = actions[i]; + INIT_LIST_HEAD(&action_mem->list); + list_add_tail(&action_mem->list, &rule->rule_actions_list); + refcount_inc(&action_mem->action->refcount); + } + + return 0; + +free_action_members: + dr_rule_remove_action_members(rule); + return -ENOMEM; +} + +void mlx5dr_rule_set_last_member(struct mlx5dr_rule_rx_tx *nic_rule, + struct mlx5dr_ste *ste, + bool force) +{ + /* Update rule member is usually done for the last STE or during rule + * creation to recover from mid-creation failure (for this peruse the + * force flag is used) + */ + if (ste->next_htbl && !force) + return; + + /* Update is required since each rule keeps track of its last STE */ + ste->rule_rx_tx = nic_rule; + nic_rule->last_rule_ste = ste; +} + +static struct mlx5dr_ste *dr_rule_get_pointed_ste(struct mlx5dr_ste *curr_ste) +{ + struct mlx5dr_ste *first_ste; + + first_ste = list_first_entry(mlx5dr_ste_get_miss_list(curr_ste), + struct mlx5dr_ste, miss_list_node); + + return first_ste->htbl->pointing_ste; +} + +int mlx5dr_rule_get_reverse_rule_members(struct mlx5dr_ste **ste_arr, + struct mlx5dr_ste *curr_ste, + int *num_of_stes) +{ + bool first = false; + + *num_of_stes = 0; + + if (!curr_ste) + return -ENOENT; + + /* Iterate from last to first */ + while (!first) { + first = curr_ste->ste_chain_location == 1; + ste_arr[*num_of_stes] = curr_ste; + *num_of_stes += 1; + curr_ste = dr_rule_get_pointed_ste(curr_ste); + } + + return 0; +} + +static void dr_rule_clean_rule_members(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule) +{ + struct mlx5dr_ste *ste_arr[DR_RULE_MAX_STES + DR_ACTION_MAX_STES]; + struct mlx5dr_ste *curr_ste = nic_rule->last_rule_ste; + int i; + + if (mlx5dr_rule_get_reverse_rule_members(ste_arr, curr_ste, &i)) + return; + + while (i--) + mlx5dr_ste_put(ste_arr[i], rule->matcher, nic_rule->nic_matcher); +} + +static u16 dr_get_bits_per_mask(u16 byte_mask) +{ + u16 bits = 0; + + while (byte_mask) { + byte_mask = byte_mask & (byte_mask - 1); + bits++; + } + + return bits; +} + +static bool dr_rule_need_enlarge_hash(struct mlx5dr_ste_htbl *htbl, + struct mlx5dr_domain *dmn, + struct mlx5dr_domain_rx_tx *nic_dmn) +{ + struct mlx5dr_ste_htbl_ctrl *ctrl = &htbl->ctrl; + int threshold; + + if (dmn->info.max_log_sw_icm_sz <= htbl->chunk->size) + return false; + + if (!mlx5dr_ste_htbl_may_grow(htbl)) + return false; + + if (dr_get_bits_per_mask(htbl->byte_mask) * BITS_PER_BYTE <= htbl->chunk->size) + return false; + + threshold = mlx5dr_ste_htbl_increase_threshold(htbl); + if (ctrl->num_of_collisions >= threshold && + (ctrl->num_of_valid_entries - ctrl->num_of_collisions) >= threshold) + return true; + + return false; +} + +static int dr_rule_handle_action_stes(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule, + struct list_head *send_ste_list, + struct mlx5dr_ste *last_ste, + u8 *hw_ste_arr, + u32 new_hw_ste_arr_sz) +{ + struct mlx5dr_matcher_rx_tx *nic_matcher = nic_rule->nic_matcher; + struct mlx5dr_ste_send_info *ste_info_arr[DR_ACTION_MAX_STES]; + u8 num_of_builders = nic_matcher->num_of_builders; + struct mlx5dr_matcher *matcher = rule->matcher; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + u8 *curr_hw_ste, *prev_hw_ste; + struct mlx5dr_ste *action_ste; + int i, k; + + /* Two cases: + * 1. num_of_builders is equal to new_hw_ste_arr_sz, the action in the ste + * 2. num_of_builders is less then new_hw_ste_arr_sz, new ste was added + * to support the action. + */ + + for (i = num_of_builders, k = 0; i < new_hw_ste_arr_sz; i++, k++) { + curr_hw_ste = hw_ste_arr + i * DR_STE_SIZE; + prev_hw_ste = (i == 0) ? curr_hw_ste : hw_ste_arr + ((i - 1) * DR_STE_SIZE); + action_ste = dr_rule_create_collision_htbl(matcher, + nic_matcher, + curr_hw_ste); + if (!action_ste) + return -ENOMEM; + + mlx5dr_ste_get(action_ste); + + action_ste->htbl->pointing_ste = last_ste; + last_ste->next_htbl = action_ste->htbl; + last_ste = action_ste; + + /* While free ste we go over the miss list, so add this ste to the list */ + list_add_tail(&action_ste->miss_list_node, + mlx5dr_ste_get_miss_list(action_ste)); + + ste_info_arr[k] = mlx5dr_send_info_alloc(dmn, + nic_matcher->nic_tbl->nic_dmn->type); + if (!ste_info_arr[k]) + goto err_exit; + + /* Point current ste to the new action */ + mlx5dr_ste_set_hit_addr_by_next_htbl(dmn->ste_ctx, + prev_hw_ste, + action_ste->htbl); + + mlx5dr_rule_set_last_member(nic_rule, action_ste, true); + + mlx5dr_send_fill_and_append_ste_send_info(action_ste, DR_STE_SIZE, 0, + curr_hw_ste, + ste_info_arr[k], + send_ste_list, false); + } + + last_ste->next_htbl = NULL; + + return 0; + +err_exit: + mlx5dr_ste_put(action_ste, matcher, nic_matcher); + return -ENOMEM; +} + +static int dr_rule_handle_empty_entry(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste_htbl *cur_htbl, + struct mlx5dr_ste *ste, + u8 ste_location, + u8 *hw_ste, + struct list_head *miss_list, + struct list_head *send_list) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste_send_info *ste_info; + + /* Take ref on table, only on first time this ste is used */ + mlx5dr_htbl_get(cur_htbl); + + /* new entry -> new branch */ + list_add_tail(&ste->miss_list_node, miss_list); + + dr_rule_set_last_ste_miss_addr(matcher, nic_matcher, hw_ste); + + ste->ste_chain_location = ste_location; + + ste_info = mlx5dr_send_info_alloc(dmn, + nic_matcher->nic_tbl->nic_dmn->type); + if (!ste_info) + goto clean_ste_setting; + + if (mlx5dr_ste_create_next_htbl(matcher, + nic_matcher, + ste, + hw_ste, + DR_CHUNK_SIZE_1)) { + mlx5dr_dbg(dmn, "Failed allocating table\n"); + goto clean_ste_info; + } + + cur_htbl->ctrl.num_of_valid_entries++; + + mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE, 0, hw_ste, + ste_info, send_list, false); + + return 0; + +clean_ste_info: + mlx5dr_send_info_free(ste_info); +clean_ste_setting: + list_del_init(&ste->miss_list_node); + mlx5dr_htbl_put(cur_htbl); + + return -ENOMEM; +} + +static struct mlx5dr_ste * +dr_rule_handle_ste_branch(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule, + struct list_head *send_ste_list, + struct mlx5dr_ste_htbl *cur_htbl, + u8 *hw_ste, + u8 ste_location, + struct mlx5dr_ste_htbl **put_htbl) +{ + struct mlx5dr_matcher *matcher = rule->matcher; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_matcher_rx_tx *nic_matcher; + struct mlx5dr_domain_rx_tx *nic_dmn; + struct mlx5dr_ste_htbl *new_htbl; + struct mlx5dr_ste *matched_ste; + struct list_head *miss_list; + bool skip_rehash = false; + struct mlx5dr_ste *ste; + int index; + + nic_matcher = nic_rule->nic_matcher; + nic_dmn = nic_matcher->nic_tbl->nic_dmn; + +again: + index = mlx5dr_ste_calc_hash_index(hw_ste, cur_htbl); + miss_list = &cur_htbl->chunk->miss_list[index]; + ste = &cur_htbl->chunk->ste_arr[index]; + + if (mlx5dr_ste_is_not_used(ste)) { + if (dr_rule_handle_empty_entry(matcher, nic_matcher, cur_htbl, + ste, ste_location, + hw_ste, miss_list, + send_ste_list)) + return NULL; + } else { + /* Hash table index in use, check if this ste is in the miss list */ + matched_ste = dr_rule_find_ste_in_miss_list(miss_list, hw_ste); + if (matched_ste) { + /* If it is last STE in the chain, and has the same tag + * it means that all the previous stes are the same, + * if so, this rule is duplicated. + */ + if (!mlx5dr_ste_is_last_in_rule(nic_matcher, ste_location)) + return matched_ste; + + mlx5dr_dbg(dmn, "Duplicate rule inserted\n"); + } + + if (!skip_rehash && dr_rule_need_enlarge_hash(cur_htbl, dmn, nic_dmn)) { + /* Hash table index in use, try to resize of the hash */ + skip_rehash = true; + + /* Hold the table till we update. + * Release in dr_rule_create_rule() + */ + *put_htbl = cur_htbl; + mlx5dr_htbl_get(cur_htbl); + + new_htbl = dr_rule_rehash(rule, nic_rule, cur_htbl, + ste_location, send_ste_list); + if (!new_htbl) { + mlx5dr_err(dmn, "Failed creating rehash table, htbl-log_size: %d\n", + cur_htbl->chunk->size); + mlx5dr_htbl_put(cur_htbl); + } else { + cur_htbl = new_htbl; + } + goto again; + } else { + /* Hash table index in use, add another collision (miss) */ + ste = dr_rule_handle_collision(matcher, + nic_matcher, + ste, + hw_ste, + miss_list, + send_ste_list); + if (!ste) { + mlx5dr_dbg(dmn, "failed adding collision entry, index: %d\n", + index); + return NULL; + } + } + } + return ste; +} + +static bool dr_rule_cmp_value_to_mask(u8 *mask, u8 *value, + u32 s_idx, u32 e_idx) +{ + u32 i; + + for (i = s_idx; i < e_idx; i++) { + if (value[i] & ~mask[i]) { + pr_info("Rule parameters contains a value not specified by mask\n"); + return false; + } + } + return true; +} + +static bool dr_rule_verify(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *value, + struct mlx5dr_match_param *param) +{ + u8 match_criteria = matcher->match_criteria; + size_t value_size = value->match_sz; + u8 *mask_p = (u8 *)&matcher->mask; + u8 *param_p = (u8 *)param; + u32 s_idx, e_idx; + + if (!value_size || + (value_size > DR_SZ_MATCH_PARAM || (value_size % sizeof(u32)))) { + mlx5dr_err(matcher->tbl->dmn, "Rule parameters length is incorrect\n"); + return false; + } + + mlx5dr_ste_copy_param(matcher->match_criteria, param, value, false); + + if (match_criteria & DR_MATCHER_CRITERIA_OUTER) { + s_idx = offsetof(struct mlx5dr_match_param, outer); + e_idx = min(s_idx + sizeof(param->outer), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_err(matcher->tbl->dmn, "Rule outer parameters contains a value not specified by mask\n"); + return false; + } + } + + if (match_criteria & DR_MATCHER_CRITERIA_MISC) { + s_idx = offsetof(struct mlx5dr_match_param, misc); + e_idx = min(s_idx + sizeof(param->misc), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_err(matcher->tbl->dmn, "Rule misc parameters contains a value not specified by mask\n"); + return false; + } + } + + if (match_criteria & DR_MATCHER_CRITERIA_INNER) { + s_idx = offsetof(struct mlx5dr_match_param, inner); + e_idx = min(s_idx + sizeof(param->inner), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_err(matcher->tbl->dmn, "Rule inner parameters contains a value not specified by mask\n"); + return false; + } + } + + if (match_criteria & DR_MATCHER_CRITERIA_MISC2) { + s_idx = offsetof(struct mlx5dr_match_param, misc2); + e_idx = min(s_idx + sizeof(param->misc2), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_err(matcher->tbl->dmn, "Rule misc2 parameters contains a value not specified by mask\n"); + return false; + } + } + + if (match_criteria & DR_MATCHER_CRITERIA_MISC3) { + s_idx = offsetof(struct mlx5dr_match_param, misc3); + e_idx = min(s_idx + sizeof(param->misc3), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_err(matcher->tbl->dmn, "Rule misc3 parameters contains a value not specified by mask\n"); + return false; + } + } + + if (match_criteria & DR_MATCHER_CRITERIA_MISC4) { + s_idx = offsetof(struct mlx5dr_match_param, misc4); + e_idx = min(s_idx + sizeof(param->misc4), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_err(matcher->tbl->dmn, + "Rule misc4 parameters contains a value not specified by mask\n"); + return false; + } + } + + if (match_criteria & DR_MATCHER_CRITERIA_MISC5) { + s_idx = offsetof(struct mlx5dr_match_param, misc5); + e_idx = min(s_idx + sizeof(param->misc5), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_err(matcher->tbl->dmn, "Rule misc5 parameters contains a value not specified by mask\n"); + return false; + } + } + return true; +} + +static int dr_rule_destroy_rule_nic(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule) +{ + /* Check if this nic rule was actually created, or was it skipped + * and only the other type of the RX/TX nic rule was created. + */ + if (!nic_rule->last_rule_ste) + return 0; + + mlx5dr_domain_nic_lock(nic_rule->nic_matcher->nic_tbl->nic_dmn); + dr_rule_clean_rule_members(rule, nic_rule); + + nic_rule->nic_matcher->rules--; + if (!nic_rule->nic_matcher->rules) + mlx5dr_matcher_remove_from_tbl_nic(rule->matcher->tbl->dmn, + nic_rule->nic_matcher); + + mlx5dr_domain_nic_unlock(nic_rule->nic_matcher->nic_tbl->nic_dmn); + + return 0; +} + +static int dr_rule_destroy_rule_fdb(struct mlx5dr_rule *rule) +{ + dr_rule_destroy_rule_nic(rule, &rule->rx); + dr_rule_destroy_rule_nic(rule, &rule->tx); + return 0; +} + +static int dr_rule_destroy_rule(struct mlx5dr_rule *rule) +{ + struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn; + + mlx5dr_dbg_rule_del(rule); + + switch (dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + dr_rule_destroy_rule_nic(rule, &rule->rx); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + dr_rule_destroy_rule_nic(rule, &rule->tx); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + dr_rule_destroy_rule_fdb(rule); + break; + default: + return -EINVAL; + } + + dr_rule_remove_action_members(rule); + kfree(rule); + return 0; +} + +static enum mlx5dr_ipv dr_rule_get_ipv(struct mlx5dr_match_spec *spec) +{ + if (spec->ip_version == 6 || spec->ethertype == ETH_P_IPV6) + return DR_RULE_IPV6; + + return DR_RULE_IPV4; +} + +static bool dr_rule_skip(enum mlx5dr_domain_type domain, + enum mlx5dr_domain_nic_type nic_type, + struct mlx5dr_match_param *mask, + struct mlx5dr_match_param *value, + u32 flow_source) +{ + bool rx = nic_type == DR_DOMAIN_NIC_TYPE_RX; + + if (domain != MLX5DR_DOMAIN_TYPE_FDB) + return false; + + if (mask->misc.source_port) { + if (rx && value->misc.source_port != MLX5_VPORT_UPLINK) + return true; + + if (!rx && value->misc.source_port == MLX5_VPORT_UPLINK) + return true; + } + + if (rx && flow_source == MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT) + return true; + + if (!rx && flow_source == MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK) + return true; + + return false; +} + +static int +dr_rule_create_rule_nic(struct mlx5dr_rule *rule, + struct mlx5dr_rule_rx_tx *nic_rule, + struct mlx5dr_match_param *param, + size_t num_actions, + struct mlx5dr_action *actions[]) +{ + u8 hw_ste_arr_optimized[DR_RULE_MAX_STE_CHAIN_OPTIMIZED * DR_STE_SIZE] = {}; + struct mlx5dr_ste_send_info *ste_info, *tmp_ste_info; + struct mlx5dr_matcher *matcher = rule->matcher; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_matcher_rx_tx *nic_matcher; + struct mlx5dr_domain_rx_tx *nic_dmn; + struct mlx5dr_ste_htbl *htbl = NULL; + struct mlx5dr_ste_htbl *cur_htbl; + struct mlx5dr_ste *ste = NULL; + LIST_HEAD(send_ste_list); + bool hw_ste_arr_is_opt; + u8 *hw_ste_arr = NULL; + u32 new_hw_ste_arr_sz; + int ret, i; + + nic_matcher = nic_rule->nic_matcher; + nic_dmn = nic_matcher->nic_tbl->nic_dmn; + + if (dr_rule_skip(dmn->type, nic_dmn->type, &matcher->mask, param, + rule->flow_source)) + return 0; + + mlx5dr_domain_nic_lock(nic_dmn); + + ret = mlx5dr_matcher_select_builders(matcher, + nic_matcher, + dr_rule_get_ipv(¶m->outer), + dr_rule_get_ipv(¶m->inner)); + if (ret) + goto err_unlock; + + hw_ste_arr_is_opt = nic_matcher->num_of_builders <= DR_RULE_MAX_STES_OPTIMIZED; + if (likely(hw_ste_arr_is_opt)) { + hw_ste_arr = hw_ste_arr_optimized; + } else { + hw_ste_arr = kzalloc((nic_matcher->num_of_builders + DR_ACTION_MAX_STES) * + DR_STE_SIZE, GFP_KERNEL); + + if (!hw_ste_arr) { + ret = -ENOMEM; + goto err_unlock; + } + } + + ret = mlx5dr_matcher_add_to_tbl_nic(dmn, nic_matcher); + if (ret) + goto free_hw_ste; + + /* Set the tag values inside the ste array */ + ret = mlx5dr_ste_build_ste_arr(matcher, nic_matcher, param, hw_ste_arr); + if (ret) + goto remove_from_nic_tbl; + + /* Set the actions values/addresses inside the ste array */ + ret = mlx5dr_actions_build_ste_arr(matcher, nic_matcher, actions, + num_actions, hw_ste_arr, + &new_hw_ste_arr_sz); + if (ret) + goto remove_from_nic_tbl; + + cur_htbl = nic_matcher->s_htbl; + + /* Go over the array of STEs, and build dr_ste accordingly. + * The loop is over only the builders which are equal or less to the + * number of stes, in case we have actions that lives in other stes. + */ + for (i = 0; i < nic_matcher->num_of_builders; i++) { + /* Calculate CRC and keep new ste entry */ + u8 *cur_hw_ste_ent = hw_ste_arr + (i * DR_STE_SIZE); + + ste = dr_rule_handle_ste_branch(rule, + nic_rule, + &send_ste_list, + cur_htbl, + cur_hw_ste_ent, + i + 1, + &htbl); + if (!ste) { + mlx5dr_err(dmn, "Failed creating next branch\n"); + ret = -ENOENT; + goto free_rule; + } + + cur_htbl = ste->next_htbl; + + mlx5dr_ste_get(ste); + mlx5dr_rule_set_last_member(nic_rule, ste, true); + } + + /* Connect actions */ + ret = dr_rule_handle_action_stes(rule, nic_rule, &send_ste_list, + ste, hw_ste_arr, new_hw_ste_arr_sz); + if (ret) { + mlx5dr_dbg(dmn, "Failed apply actions\n"); + goto free_rule; + } + ret = dr_rule_send_update_list(&send_ste_list, dmn, true); + if (ret) { + mlx5dr_err(dmn, "Failed sending ste!\n"); + goto free_rule; + } + + if (htbl) + mlx5dr_htbl_put(htbl); + + nic_matcher->rules++; + + mlx5dr_domain_nic_unlock(nic_dmn); + + if (unlikely(!hw_ste_arr_is_opt)) + kfree(hw_ste_arr); + + return 0; + +free_rule: + dr_rule_clean_rule_members(rule, nic_rule); + /* Clean all ste_info's */ + list_for_each_entry_safe(ste_info, tmp_ste_info, &send_ste_list, send_list) { + list_del(&ste_info->send_list); + mlx5dr_send_info_free(ste_info); + } + +remove_from_nic_tbl: + if (!nic_matcher->rules) + mlx5dr_matcher_remove_from_tbl_nic(dmn, nic_matcher); + +free_hw_ste: + if (!hw_ste_arr_is_opt) + kfree(hw_ste_arr); + +err_unlock: + mlx5dr_domain_nic_unlock(nic_dmn); + + return ret; +} + +static int +dr_rule_create_rule_fdb(struct mlx5dr_rule *rule, + struct mlx5dr_match_param *param, + size_t num_actions, + struct mlx5dr_action *actions[]) +{ + struct mlx5dr_match_param copy_param = {}; + int ret; + + /* Copy match_param since they will be consumed during the first + * nic_rule insertion. + */ + memcpy(©_param, param, sizeof(struct mlx5dr_match_param)); + + ret = dr_rule_create_rule_nic(rule, &rule->rx, param, + num_actions, actions); + if (ret) + return ret; + + ret = dr_rule_create_rule_nic(rule, &rule->tx, ©_param, + num_actions, actions); + if (ret) + goto destroy_rule_nic_rx; + + return 0; + +destroy_rule_nic_rx: + dr_rule_destroy_rule_nic(rule, &rule->rx); + return ret; +} + +static struct mlx5dr_rule * +dr_rule_create_rule(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *value, + size_t num_actions, + struct mlx5dr_action *actions[], + u32 flow_source) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_match_param param = {}; + struct mlx5dr_rule *rule; + int ret; + + if (!dr_rule_verify(matcher, value, ¶m)) + return NULL; + + rule = kzalloc(sizeof(*rule), GFP_KERNEL); + if (!rule) + return NULL; + + rule->matcher = matcher; + rule->flow_source = flow_source; + INIT_LIST_HEAD(&rule->rule_actions_list); + + ret = dr_rule_add_action_members(rule, num_actions, actions); + if (ret) + goto free_rule; + + switch (dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + rule->rx.nic_matcher = &matcher->rx; + ret = dr_rule_create_rule_nic(rule, &rule->rx, ¶m, + num_actions, actions); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + rule->tx.nic_matcher = &matcher->tx; + ret = dr_rule_create_rule_nic(rule, &rule->tx, ¶m, + num_actions, actions); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + rule->rx.nic_matcher = &matcher->rx; + rule->tx.nic_matcher = &matcher->tx; + ret = dr_rule_create_rule_fdb(rule, ¶m, + num_actions, actions); + break; + default: + ret = -EINVAL; + break; + } + + if (ret) + goto remove_action_members; + + INIT_LIST_HEAD(&rule->dbg_node); + mlx5dr_dbg_rule_add(rule); + return rule; + +remove_action_members: + dr_rule_remove_action_members(rule); +free_rule: + kfree(rule); + mlx5dr_err(dmn, "Failed creating rule\n"); + return NULL; +} + +struct mlx5dr_rule *mlx5dr_rule_create(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *value, + size_t num_actions, + struct mlx5dr_action *actions[], + u32 flow_source) +{ + struct mlx5dr_rule *rule; + + refcount_inc(&matcher->refcount); + + rule = dr_rule_create_rule(matcher, value, num_actions, actions, flow_source); + if (!rule) + refcount_dec(&matcher->refcount); + + return rule; +} + +int mlx5dr_rule_destroy(struct mlx5dr_rule *rule) +{ + struct mlx5dr_matcher *matcher = rule->matcher; + int ret; + + ret = dr_rule_destroy_rule(rule); + if (!ret) + refcount_dec(&matcher->refcount); + + return ret; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c new file mode 100644 index 0000000000..6fa06ba2d3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -0,0 +1,1368 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include +#include "dr_types.h" + +#define QUEUE_SIZE 128 +#define SIGNAL_PER_DIV_QUEUE 16 +#define TH_NUMS_TO_DRAIN 2 +#define DR_SEND_INFO_POOL_SIZE 1000 + +enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 }; + +struct dr_data_seg { + u64 addr; + u32 length; + u32 lkey; + unsigned int send_flags; +}; + +enum send_info_type { + WRITE_ICM = 0, + GTA_ARG = 1, +}; + +struct postsend_info { + enum send_info_type type; + struct dr_data_seg write; + struct dr_data_seg read; + u64 remote_addr; + u32 rkey; +}; + +struct dr_qp_rtr_attr { + struct mlx5dr_cmd_gid_attr dgid_attr; + enum ib_mtu mtu; + u32 qp_num; + u16 port_num; + u8 min_rnr_timer; + u8 sgid_index; + u16 udp_src_port; + u8 fl:1; +}; + +struct dr_qp_rts_attr { + u8 timeout; + u8 retry_cnt; + u8 rnr_retry; +}; + +struct dr_qp_init_attr { + u32 cqn; + u32 pdn; + u32 max_send_wr; + struct mlx5_uars_page *uar; + u8 isolate_vl_tc:1; +}; + +struct mlx5dr_send_info_pool_obj { + struct mlx5dr_ste_send_info ste_send_info; + struct mlx5dr_send_info_pool *pool; + struct list_head list_node; +}; + +struct mlx5dr_send_info_pool { + struct list_head free_list; +}; + +static int dr_send_info_pool_fill(struct mlx5dr_send_info_pool *pool) +{ + struct mlx5dr_send_info_pool_obj *pool_obj, *tmp_pool_obj; + int i; + + for (i = 0; i < DR_SEND_INFO_POOL_SIZE; i++) { + pool_obj = kzalloc(sizeof(*pool_obj), GFP_KERNEL); + if (!pool_obj) + goto clean_pool; + + pool_obj->pool = pool; + list_add_tail(&pool_obj->list_node, &pool->free_list); + } + + return 0; + +clean_pool: + list_for_each_entry_safe(pool_obj, tmp_pool_obj, &pool->free_list, list_node) { + list_del(&pool_obj->list_node); + kfree(pool_obj); + } + + return -ENOMEM; +} + +static void dr_send_info_pool_destroy(struct mlx5dr_send_info_pool *pool) +{ + struct mlx5dr_send_info_pool_obj *pool_obj, *tmp_pool_obj; + + list_for_each_entry_safe(pool_obj, tmp_pool_obj, &pool->free_list, list_node) { + list_del(&pool_obj->list_node); + kfree(pool_obj); + } + + kfree(pool); +} + +void mlx5dr_send_info_pool_destroy(struct mlx5dr_domain *dmn) +{ + dr_send_info_pool_destroy(dmn->send_info_pool_tx); + dr_send_info_pool_destroy(dmn->send_info_pool_rx); +} + +static struct mlx5dr_send_info_pool *dr_send_info_pool_create(void) +{ + struct mlx5dr_send_info_pool *pool; + int ret; + + pool = kzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + return NULL; + + INIT_LIST_HEAD(&pool->free_list); + + ret = dr_send_info_pool_fill(pool); + if (ret) { + kfree(pool); + return NULL; + } + + return pool; +} + +int mlx5dr_send_info_pool_create(struct mlx5dr_domain *dmn) +{ + dmn->send_info_pool_rx = dr_send_info_pool_create(); + if (!dmn->send_info_pool_rx) + return -ENOMEM; + + dmn->send_info_pool_tx = dr_send_info_pool_create(); + if (!dmn->send_info_pool_tx) { + dr_send_info_pool_destroy(dmn->send_info_pool_rx); + return -ENOMEM; + } + + return 0; +} + +struct mlx5dr_ste_send_info +*mlx5dr_send_info_alloc(struct mlx5dr_domain *dmn, + enum mlx5dr_domain_nic_type nic_type) +{ + struct mlx5dr_send_info_pool_obj *pool_obj; + struct mlx5dr_send_info_pool *pool; + int ret; + + pool = nic_type == DR_DOMAIN_NIC_TYPE_RX ? dmn->send_info_pool_rx : + dmn->send_info_pool_tx; + + if (unlikely(list_empty(&pool->free_list))) { + ret = dr_send_info_pool_fill(pool); + if (ret) + return NULL; + } + + pool_obj = list_first_entry_or_null(&pool->free_list, + struct mlx5dr_send_info_pool_obj, + list_node); + + if (likely(pool_obj)) { + list_del_init(&pool_obj->list_node); + } else { + WARN_ONCE(!pool_obj, "Failed getting ste send info obj from pool"); + return NULL; + } + + return &pool_obj->ste_send_info; +} + +void mlx5dr_send_info_free(struct mlx5dr_ste_send_info *ste_send_info) +{ + struct mlx5dr_send_info_pool_obj *pool_obj; + + pool_obj = container_of(ste_send_info, + struct mlx5dr_send_info_pool_obj, + ste_send_info); + + list_add(&pool_obj->list_node, &pool_obj->pool->free_list); +} + +static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64) +{ + unsigned int idx; + u8 opcode; + + opcode = get_cqe_opcode(cqe64); + if (opcode == MLX5_CQE_REQ_ERR) { + idx = be16_to_cpu(cqe64->wqe_counter) & + (dr_cq->qp->sq.wqe_cnt - 1); + dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1; + } else if (opcode == MLX5_CQE_RESP_ERR) { + ++dr_cq->qp->sq.cc; + } else { + idx = be16_to_cpu(cqe64->wqe_counter) & + (dr_cq->qp->sq.wqe_cnt - 1); + dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1; + + return CQ_OK; + } + + return CQ_POLL_ERR; +} + +static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq) +{ + struct mlx5_cqe64 *cqe64; + int err; + + cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq); + if (!cqe64) { + if (unlikely(dr_cq->mdev->state == + MLX5_DEVICE_STATE_INTERNAL_ERROR)) { + mlx5_core_dbg_once(dr_cq->mdev, + "Polling CQ while device is shutting down\n"); + return CQ_POLL_ERR; + } + return CQ_EMPTY; + } + + mlx5_cqwq_pop(&dr_cq->wq); + err = dr_parse_cqe(dr_cq, cqe64); + mlx5_cqwq_update_db_record(&dr_cq->wq); + + return err; +} + +static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne) +{ + int npolled; + int err = 0; + + for (npolled = 0; npolled < ne; ++npolled) { + err = dr_cq_poll_one(dr_cq); + if (err != CQ_OK) + break; + } + + return err == CQ_POLL_ERR ? err : npolled; +} + +static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, + struct dr_qp_init_attr *attr) +{ + u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; + u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {}; + struct mlx5_wq_param wqp; + struct mlx5dr_qp *dr_qp; + int inlen; + void *qpc; + void *in; + int err; + + dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL); + if (!dr_qp) + return NULL; + + wqp.buf_numa_node = mdev->priv.numa_node; + wqp.db_numa_node = mdev->priv.numa_node; + + dr_qp->rq.pc = 0; + dr_qp->rq.cc = 0; + dr_qp->rq.wqe_cnt = 256; + dr_qp->sq.pc = 0; + dr_qp->sq.cc = 0; + dr_qp->sq.head = 0; + dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr); + + MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); + MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt)); + MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt)); + err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq, + &dr_qp->wq_ctrl); + if (err) { + mlx5_core_warn(mdev, "Can't create QP WQ\n"); + goto err_wq; + } + + dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt, + sizeof(dr_qp->sq.wqe_head[0]), + GFP_KERNEL); + + if (!dr_qp->sq.wqe_head) { + mlx5_core_warn(mdev, "Can't allocate wqe head\n"); + goto err_wqe_head; + } + + inlen = MLX5_ST_SZ_BYTES(create_qp_in) + + MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * + dr_qp->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_in; + } + + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, isolate_vl_tc, attr->isolate_vl_tc); + MLX5_SET(qpc, qpc, pd, attr->pdn); + MLX5_SET(qpc, qpc, uar_page, attr->uar->index); + MLX5_SET(qpc, qpc, log_page_size, + dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(qpc, qpc, fre, 1); + MLX5_SET(qpc, qpc, rlky, 1); + MLX5_SET(qpc, qpc, cqn_snd, attr->cqn); + MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn); + MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); + MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt)); + MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); + MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt)); + MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev)); + MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma); + if (MLX5_CAP_GEN(mdev, cqe_version) == 1) + MLX5_SET(qpc, qpc, user_index, 0xFFFFFF); + mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(create_qp_in, + in, pas)); + + MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + dr_qp->qpn = MLX5_GET(create_qp_out, out, qpn); + kvfree(in); + if (err) + goto err_in; + dr_qp->uar = attr->uar; + + return dr_qp; + +err_in: + kfree(dr_qp->sq.wqe_head); +err_wqe_head: + mlx5_wq_destroy(&dr_qp->wq_ctrl); +err_wq: + kfree(dr_qp); + return NULL; +} + +static void dr_destroy_qp(struct mlx5_core_dev *mdev, + struct mlx5dr_qp *dr_qp) +{ + u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; + + MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, in, qpn, dr_qp->qpn); + mlx5_cmd_exec_in(mdev, destroy_qp, in); + + kfree(dr_qp->sq.wqe_head); + mlx5_wq_destroy(&dr_qp->wq_ctrl); + kfree(dr_qp); +} + +static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl) +{ + dma_wmb(); + *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xffff); + + /* After wmb() the hw aware of new work */ + wmb(); + + mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET); +} + +static void +dr_rdma_handle_flow_access_arg_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl, + u32 remote_addr, + struct dr_data_seg *data_seg, + int *size) +{ + struct mlx5_wqe_header_modify_argument_update_seg *wq_arg_seg; + struct mlx5_wqe_flow_update_ctrl_seg *wq_flow_seg; + + wq_ctrl->general_id = cpu_to_be32(remote_addr); + wq_flow_seg = (void *)(wq_ctrl + 1); + + /* mlx5_wqe_flow_update_ctrl_seg - all reserved */ + memset(wq_flow_seg, 0, sizeof(*wq_flow_seg)); + wq_arg_seg = (void *)(wq_flow_seg + 1); + + memcpy(wq_arg_seg->argument_list, + (void *)(uintptr_t)data_seg->addr, + data_seg->length); + + *size = (sizeof(*wq_ctrl) + /* WQE ctrl segment */ + sizeof(*wq_flow_seg) + /* WQE flow update ctrl seg - reserved */ + sizeof(*wq_arg_seg)) / /* WQE hdr modify arg seg - data */ + MLX5_SEND_WQE_DS; +} + +static void +dr_rdma_handle_icm_write_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl, + u64 remote_addr, + u32 rkey, + struct dr_data_seg *data_seg, + unsigned int *size) +{ + struct mlx5_wqe_raddr_seg *wq_raddr; + struct mlx5_wqe_data_seg *wq_dseg; + + wq_raddr = (void *)(wq_ctrl + 1); + + wq_raddr->raddr = cpu_to_be64(remote_addr); + wq_raddr->rkey = cpu_to_be32(rkey); + wq_raddr->reserved = 0; + + wq_dseg = (void *)(wq_raddr + 1); + + wq_dseg->byte_count = cpu_to_be32(data_seg->length); + wq_dseg->lkey = cpu_to_be32(data_seg->lkey); + wq_dseg->addr = cpu_to_be64(data_seg->addr); + + *size = (sizeof(*wq_ctrl) + /* WQE ctrl segment */ + sizeof(*wq_dseg) + /* WQE data segment */ + sizeof(*wq_raddr)) / /* WQE remote addr segment */ + MLX5_SEND_WQE_DS; +} + +static void dr_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *wq_ctrl, + struct dr_data_seg *data_seg) +{ + wq_ctrl->signature = 0; + wq_ctrl->rsvd[0] = 0; + wq_ctrl->rsvd[1] = 0; + wq_ctrl->fm_ce_se = data_seg->send_flags & IB_SEND_SIGNALED ? + MLX5_WQE_CTRL_CQ_UPDATE : 0; + wq_ctrl->imm = 0; +} + +static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr, + u32 rkey, struct dr_data_seg *data_seg, + u32 opcode, bool notify_hw) +{ + struct mlx5_wqe_ctrl_seg *wq_ctrl; + int opcode_mod = 0; + unsigned int size; + unsigned int idx; + + idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1); + + wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx); + dr_set_ctrl_seg(wq_ctrl, data_seg); + + switch (opcode) { + case MLX5_OPCODE_RDMA_READ: + case MLX5_OPCODE_RDMA_WRITE: + dr_rdma_handle_icm_write_segments(wq_ctrl, remote_addr, + rkey, data_seg, &size); + break; + case MLX5_OPCODE_FLOW_TBL_ACCESS: + opcode_mod = MLX5_CMD_OP_MOD_UPDATE_HEADER_MODIFY_ARGUMENT; + dr_rdma_handle_flow_access_arg_segments(wq_ctrl, remote_addr, + data_seg, &size); + break; + default: + WARN(true, "illegal opcode %d", opcode); + return; + } + + /* -------------------------------------------------------- + * |opcode_mod (8 bit)|wqe_index (16 bits)| opcod (8 bits)| + * -------------------------------------------------------- + */ + wq_ctrl->opmod_idx_opcode = + cpu_to_be32((opcode_mod << 24) | + ((dr_qp->sq.pc & 0xffff) << 8) | + opcode); + wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8); + + dr_qp->sq.pc += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); + dr_qp->sq.wqe_head[idx] = dr_qp->sq.head++; + + if (notify_hw) + dr_cmd_notify_hw(dr_qp, wq_ctrl); +} + +static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info) +{ + if (send_info->type == WRITE_ICM) { + dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, + &send_info->write, MLX5_OPCODE_RDMA_WRITE, false); + dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, + &send_info->read, MLX5_OPCODE_RDMA_READ, true); + } else { /* GTA_ARG */ + dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, + &send_info->write, MLX5_OPCODE_FLOW_TBL_ACCESS, true); + } + +} + +/** + * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent + * with send_list parameters: + * + * @ste: The data that attached to this specific ste + * @size: of data to write + * @offset: of the data from start of the hw_ste entry + * @data: data + * @ste_info: ste to be sent with send_list + * @send_list: to append into it + * @copy_data: if true indicates that the data should be kept because + * it's not backuped any where (like in re-hash). + * if false, it lets the data to be updated after + * it was added to the list. + */ +void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size, + u16 offset, u8 *data, + struct mlx5dr_ste_send_info *ste_info, + struct list_head *send_list, + bool copy_data) +{ + ste_info->size = size; + ste_info->ste = ste; + ste_info->offset = offset; + + if (copy_data) { + memcpy(ste_info->data_cont, data, size); + ste_info->data = ste_info->data_cont; + } else { + ste_info->data = data; + } + + list_add_tail(&ste_info->send_list, send_list); +} + +/* The function tries to consume one wc each time, unless the queue is full, in + * that case, which means that the hw is behind the sw in a full queue len + * the function will drain the cq till it empty. + */ +static int dr_handle_pending_wc(struct mlx5dr_domain *dmn, + struct mlx5dr_send_ring *send_ring) +{ + bool is_drain = false; + int ne; + + if (send_ring->pending_wqe < send_ring->signal_th) + return 0; + + /* Queue is full start drain it */ + if (send_ring->pending_wqe >= + dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN) + is_drain = true; + + do { + ne = dr_poll_cq(send_ring->cq, 1); + if (unlikely(ne < 0)) { + mlx5_core_warn_once(dmn->mdev, "SMFS QPN 0x%x is disabled/limited", + send_ring->qp->qpn); + send_ring->err_state = true; + return ne; + } else if (ne == 1) { + send_ring->pending_wqe -= send_ring->signal_th; + } + } while (ne == 1 || + (is_drain && send_ring->pending_wqe >= send_ring->signal_th)); + + return 0; +} + +static void dr_fill_write_args_segs(struct mlx5dr_send_ring *send_ring, + struct postsend_info *send_info) +{ + send_ring->pending_wqe++; + + if (send_ring->pending_wqe % send_ring->signal_th == 0) + send_info->write.send_flags |= IB_SEND_SIGNALED; + else + send_info->write.send_flags = 0; +} + +static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn, + struct mlx5dr_send_ring *send_ring, + struct postsend_info *send_info) +{ + u32 buff_offset; + + if (send_info->write.length > dmn->info.max_inline_size) { + buff_offset = (send_ring->tx_head & + (dmn->send_ring->signal_th - 1)) * + send_ring->max_post_send_size; + /* Copy to ring mr */ + memcpy(send_ring->buf + buff_offset, + (void *)(uintptr_t)send_info->write.addr, + send_info->write.length); + send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset; + send_info->write.lkey = send_ring->mr->mkey; + + send_ring->tx_head++; + } + + send_ring->pending_wqe++; + + if (send_ring->pending_wqe % send_ring->signal_th == 0) + send_info->write.send_flags |= IB_SEND_SIGNALED; + + send_ring->pending_wqe++; + send_info->read.length = send_info->write.length; + + /* Read into dedicated sync buffer */ + send_info->read.addr = (uintptr_t)send_ring->sync_mr->dma_addr; + send_info->read.lkey = send_ring->sync_mr->mkey; + + if (send_ring->pending_wqe % send_ring->signal_th == 0) + send_info->read.send_flags = IB_SEND_SIGNALED; + else + send_info->read.send_flags = 0; +} + +static void dr_fill_data_segs(struct mlx5dr_domain *dmn, + struct mlx5dr_send_ring *send_ring, + struct postsend_info *send_info) +{ + if (send_info->type == WRITE_ICM) + dr_fill_write_icm_segs(dmn, send_ring, send_info); + else /* args */ + dr_fill_write_args_segs(send_ring, send_info); +} + +static int dr_postsend_icm_data(struct mlx5dr_domain *dmn, + struct postsend_info *send_info) +{ + struct mlx5dr_send_ring *send_ring = dmn->send_ring; + int ret; + + if (unlikely(dmn->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR || + send_ring->err_state)) { + mlx5_core_dbg_once(dmn->mdev, + "Skipping post send: QP err state: %d, device state: %d\n", + send_ring->err_state, dmn->mdev->state); + return 0; + } + + spin_lock(&send_ring->lock); + + ret = dr_handle_pending_wc(dmn, send_ring); + if (ret) + goto out_unlock; + + dr_fill_data_segs(dmn, send_ring, send_info); + dr_post_send(send_ring->qp, send_info); + +out_unlock: + spin_unlock(&send_ring->lock); + return ret; +} + +static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_htbl *htbl, + u8 **data, + u32 *byte_size, + int *iterations, + int *num_stes) +{ + u32 chunk_byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk); + int alloc_size; + + if (chunk_byte_size > dmn->send_ring->max_post_send_size) { + *iterations = chunk_byte_size / dmn->send_ring->max_post_send_size; + *byte_size = dmn->send_ring->max_post_send_size; + alloc_size = *byte_size; + *num_stes = *byte_size / DR_STE_SIZE; + } else { + *iterations = 1; + *num_stes = mlx5dr_icm_pool_get_chunk_num_of_entries(htbl->chunk); + alloc_size = *num_stes * DR_STE_SIZE; + } + + *data = kvzalloc(alloc_size, GFP_KERNEL); + if (!*data) + return -ENOMEM; + + return 0; +} + +/** + * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm. + * + * @dmn: Domain + * @ste: The ste struct that contains the data (at + * least part of it) + * @data: The real data to send size data + * @size: for writing. + * @offset: The offset from the icm mapped data to + * start write to this for write only part of the + * buffer. + * + * Return: 0 on success. + */ +int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste, + u8 *data, u16 size, u16 offset) +{ + struct postsend_info send_info = {}; + + mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, data, size); + + send_info.write.addr = (uintptr_t)data; + send_info.write.length = size; + send_info.write.lkey = 0; + send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset; + send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(ste->htbl->chunk); + + return dr_postsend_icm_data(dmn, &send_info); +} + +int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *formatted_ste, u8 *mask) +{ + u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk); + int num_stes_per_iter; + int iterations; + u8 *data; + int ret; + int i; + int j; + + ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size, + &iterations, &num_stes_per_iter); + if (ret) + return ret; + + mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, formatted_ste, DR_STE_SIZE); + + /* Send the data iteration times */ + for (i = 0; i < iterations; i++) { + u32 ste_index = i * (byte_size / DR_STE_SIZE); + struct postsend_info send_info = {}; + + /* Copy all ste's on the data buffer + * need to add the bit_mask + */ + for (j = 0; j < num_stes_per_iter; j++) { + struct mlx5dr_ste *ste = &htbl->chunk->ste_arr[ste_index + j]; + u32 ste_off = j * DR_STE_SIZE; + + if (mlx5dr_ste_is_not_used(ste)) { + memcpy(data + ste_off, + formatted_ste, DR_STE_SIZE); + } else { + /* Copy data */ + memcpy(data + ste_off, + htbl->chunk->hw_ste_arr + + DR_STE_SIZE_REDUCED * (ste_index + j), + DR_STE_SIZE_REDUCED); + /* Copy bit_mask */ + memcpy(data + ste_off + DR_STE_SIZE_REDUCED, + mask, DR_STE_SIZE_MASK); + /* Only when we have mask we need to re-arrange the STE */ + mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, + data + (j * DR_STE_SIZE), + DR_STE_SIZE); + } + } + + send_info.write.addr = (uintptr_t)data; + send_info.write.length = byte_size; + send_info.write.lkey = 0; + send_info.remote_addr = + mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index); + send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk); + + ret = dr_postsend_icm_data(dmn, &send_info); + if (ret) + goto out_free; + } + +out_free: + kvfree(data); + return ret; +} + +/* Initialize htble with default STEs */ +int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *ste_init_data, + bool update_hw_ste) +{ + u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk); + int iterations; + int num_stes; + u8 *copy_dst; + u8 *data; + int ret; + int i; + + ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size, + &iterations, &num_stes); + if (ret) + return ret; + + if (update_hw_ste) { + /* Copy the reduced STE to hash table ste_arr */ + for (i = 0; i < num_stes; i++) { + copy_dst = htbl->chunk->hw_ste_arr + i * DR_STE_SIZE_REDUCED; + memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED); + } + } + + mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, ste_init_data, DR_STE_SIZE); + + /* Copy the same STE on the data buffer */ + for (i = 0; i < num_stes; i++) { + copy_dst = data + i * DR_STE_SIZE; + memcpy(copy_dst, ste_init_data, DR_STE_SIZE); + } + + /* Send the data iteration times */ + for (i = 0; i < iterations; i++) { + u8 ste_index = i * (byte_size / DR_STE_SIZE); + struct postsend_info send_info = {}; + + send_info.write.addr = (uintptr_t)data; + send_info.write.length = byte_size; + send_info.write.lkey = 0; + send_info.remote_addr = + mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index); + send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk); + + ret = dr_postsend_icm_data(dmn, &send_info); + if (ret) + goto out_free; + } + +out_free: + kvfree(data); + return ret; +} + +int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, + struct mlx5dr_action *action) +{ + struct postsend_info send_info = {}; + + send_info.write.addr = (uintptr_t)action->rewrite->data; + send_info.write.length = action->rewrite->num_of_actions * + DR_MODIFY_ACTION_SIZE; + send_info.write.lkey = 0; + send_info.remote_addr = + mlx5dr_icm_pool_get_chunk_mr_addr(action->rewrite->chunk); + send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(action->rewrite->chunk); + + return dr_postsend_icm_data(dmn, &send_info); +} + +int mlx5dr_send_postsend_pattern(struct mlx5dr_domain *dmn, + struct mlx5dr_icm_chunk *chunk, + u16 num_of_actions, + u8 *data) +{ + struct postsend_info send_info = {}; + int ret; + + send_info.write.addr = (uintptr_t)data; + send_info.write.length = num_of_actions * DR_MODIFY_ACTION_SIZE; + send_info.remote_addr = mlx5dr_icm_pool_get_chunk_mr_addr(chunk); + send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(chunk); + + ret = dr_postsend_icm_data(dmn, &send_info); + if (ret) + return ret; + + return 0; +} + +int mlx5dr_send_postsend_args(struct mlx5dr_domain *dmn, u64 arg_id, + u16 num_of_actions, u8 *actions_data) +{ + int data_len, iter = 0, cur_sent; + u64 addr; + int ret; + + addr = (uintptr_t)actions_data; + data_len = num_of_actions * DR_MODIFY_ACTION_SIZE; + + do { + struct postsend_info send_info = {}; + + send_info.type = GTA_ARG; + send_info.write.addr = addr; + cur_sent = min_t(u32, data_len, DR_ACTION_CACHE_LINE_SIZE); + send_info.write.length = cur_sent; + send_info.write.lkey = 0; + send_info.remote_addr = arg_id + iter; + + ret = dr_postsend_icm_data(dmn, &send_info); + if (ret) + goto out; + + iter++; + addr += cur_sent; + data_len -= cur_sent; + } while (data_len > 0); + +out: + return ret; +} + +static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev, + struct mlx5dr_qp *dr_qp, + int port) +{ + u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {}; + void *qpc; + + qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc); + + MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port); + MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED); + MLX5_SET(qpc, qpc, rre, 1); + MLX5_SET(qpc, qpc, rwe, 1); + + MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP); + MLX5_SET(rst2init_qp_in, in, qpn, dr_qp->qpn); + + return mlx5_cmd_exec_in(mdev, rst2init_qp, in); +} + +static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev, + struct mlx5dr_qp *dr_qp, + struct dr_qp_rts_attr *attr) +{ + u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {}; + void *qpc; + + qpc = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc); + + MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn); + + MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt); + MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry); + MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */ + + MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP); + MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn); + + return mlx5_cmd_exec_in(mdev, rtr2rts_qp, in); +} + +static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev, + struct mlx5dr_qp *dr_qp, + struct dr_qp_rtr_attr *attr) +{ + u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {}; + void *qpc; + + qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc); + + MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn); + + MLX5_SET(qpc, qpc, mtu, attr->mtu); + MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1); + MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num); + memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32), + attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac)); + memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip), + attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid)); + MLX5_SET(qpc, qpc, primary_address_path.src_addr_index, + attr->sgid_index); + + if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2) + MLX5_SET(qpc, qpc, primary_address_path.udp_sport, + attr->udp_src_port); + + MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num); + MLX5_SET(qpc, qpc, primary_address_path.fl, attr->fl); + MLX5_SET(qpc, qpc, min_rnr_nak, 1); + + MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP); + MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn); + + return mlx5_cmd_exec_in(mdev, init2rtr_qp, in); +} + +static bool dr_send_allow_fl(struct mlx5dr_cmd_caps *caps) +{ + /* Check whether RC RoCE QP creation with force loopback is allowed. + * There are two separate capability bits for this: + * - force loopback when RoCE is enabled + * - force loopback when RoCE is disabled + */ + return ((caps->roce_caps.roce_en && + caps->roce_caps.fl_rc_qp_when_roce_enabled) || + (!caps->roce_caps.roce_en && + caps->roce_caps.fl_rc_qp_when_roce_disabled)); +} + +static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn) +{ + struct mlx5dr_qp *dr_qp = dmn->send_ring->qp; + struct dr_qp_rts_attr rts_attr = {}; + struct dr_qp_rtr_attr rtr_attr = {}; + enum ib_mtu mtu = IB_MTU_1024; + u16 gid_index = 0; + int port = 1; + int ret; + + /* Init */ + ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port); + if (ret) { + mlx5dr_err(dmn, "Failed modify QP rst2init\n"); + return ret; + } + + /* RTR */ + rtr_attr.mtu = mtu; + rtr_attr.qp_num = dr_qp->qpn; + rtr_attr.min_rnr_timer = 12; + rtr_attr.port_num = port; + rtr_attr.udp_src_port = dmn->info.caps.roce_min_src_udp; + + /* If QP creation with force loopback is allowed, then there + * is no need for GID index when creating the QP. + * Otherwise we query GID attributes and use GID index. + */ + rtr_attr.fl = dr_send_allow_fl(&dmn->info.caps); + if (!rtr_attr.fl) { + ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, + &rtr_attr.dgid_attr); + if (ret) + return ret; + + rtr_attr.sgid_index = gid_index; + } + + ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr); + if (ret) { + mlx5dr_err(dmn, "Failed modify QP init2rtr\n"); + return ret; + } + + /* RTS */ + rts_attr.timeout = 14; + rts_attr.retry_cnt = 7; + rts_attr.rnr_retry = 7; + + ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr); + if (ret) { + mlx5dr_err(dmn, "Failed modify QP rtr2rts\n"); + return ret; + } + + return 0; +} + +static void dr_cq_complete(struct mlx5_core_cq *mcq, + struct mlx5_eqe *eqe) +{ + pr_err("CQ completion CQ: #%u\n", mcq->cqn); +} + +static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, + struct mlx5_uars_page *uar, + size_t ncqe) +{ + u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {}; + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; + struct mlx5_wq_param wqp; + struct mlx5_cqe64 *cqe; + struct mlx5dr_cq *cq; + int inlen, err, eqn; + void *cqc, *in; + __be64 *pas; + int vector; + u32 i; + + cq = kzalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) + return NULL; + + ncqe = roundup_pow_of_two(ncqe); + MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe)); + + wqp.buf_numa_node = mdev->priv.numa_node; + wqp.db_numa_node = mdev->priv.numa_node; + + err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq, + &cq->wq_ctrl); + if (err) + goto out; + + for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { + cqe = mlx5_cqwq_get_wqe(&cq->wq, i); + cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK; + } + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + sizeof(u64) * cq->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + goto err_cqwq; + + vector = raw_smp_processor_id() % mlx5_comp_vectors_max(mdev); + err = mlx5_comp_eqn_get(mdev, vector, &eqn); + if (err) { + kvfree(in); + goto err_cqwq; + } + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe)); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); + MLX5_SET(cqc, cqc, uar_page, uar->index); + MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); + + pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); + mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas); + + cq->mcq.comp = dr_cq_complete; + + err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out)); + kvfree(in); + + if (err) + goto err_cqwq; + + cq->mcq.cqe_sz = 64; + cq->mcq.set_ci_db = cq->wq_ctrl.db.db; + cq->mcq.arm_db = cq->wq_ctrl.db.db + 1; + *cq->mcq.set_ci_db = 0; + + /* set no-zero value, in order to avoid the HW to run db-recovery on + * CQ that used in polling mode. + */ + *cq->mcq.arm_db = cpu_to_be32(2 << 28); + + cq->mcq.vector = 0; + cq->mcq.uar = uar; + cq->mdev = mdev; + + return cq; + +err_cqwq: + mlx5_wq_destroy(&cq->wq_ctrl); +out: + kfree(cq); + return NULL; +} + +static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq) +{ + mlx5_core_destroy_cq(mdev, &cq->mcq); + mlx5_wq_destroy(&cq->wq_ctrl); + kfree(cq); +} + +static int dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey) +{ + u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {}; + void *mkc; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, a, 1); + MLX5_SET(mkc, mkc, rw, 1); + MLX5_SET(mkc, mkc, rr, 1); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET(mkc, mkc, pd, pdn); + MLX5_SET(mkc, mkc, length64, 1); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + + return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in)); +} + +static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev, + u32 pdn, void *buf, size_t size) +{ + struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL); + struct device *dma_device; + dma_addr_t dma_addr; + int err; + + if (!mr) + return NULL; + + dma_device = mlx5_core_dma_dev(mdev); + dma_addr = dma_map_single(dma_device, buf, size, + DMA_BIDIRECTIONAL); + err = dma_mapping_error(dma_device, dma_addr); + if (err) { + mlx5_core_warn(mdev, "Can't dma buf\n"); + kfree(mr); + return NULL; + } + + err = dr_create_mkey(mdev, pdn, &mr->mkey); + if (err) { + mlx5_core_warn(mdev, "Can't create mkey\n"); + dma_unmap_single(dma_device, dma_addr, size, + DMA_BIDIRECTIONAL); + kfree(mr); + return NULL; + } + + mr->dma_addr = dma_addr; + mr->size = size; + mr->addr = buf; + + return mr; +} + +static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr) +{ + mlx5_core_destroy_mkey(mdev, mr->mkey); + dma_unmap_single(mlx5_core_dma_dev(mdev), mr->dma_addr, mr->size, + DMA_BIDIRECTIONAL); + kfree(mr); +} + +int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn) +{ + struct dr_qp_init_attr init_attr = {}; + int cq_size; + int size; + int ret; + + dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL); + if (!dmn->send_ring) + return -ENOMEM; + + cq_size = QUEUE_SIZE + 1; + dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size); + if (!dmn->send_ring->cq) { + mlx5dr_err(dmn, "Failed creating CQ\n"); + ret = -ENOMEM; + goto free_send_ring; + } + + init_attr.cqn = dmn->send_ring->cq->mcq.cqn; + init_attr.pdn = dmn->pdn; + init_attr.uar = dmn->uar; + init_attr.max_send_wr = QUEUE_SIZE; + + /* Isolated VL is applicable only if force loopback is supported */ + if (dr_send_allow_fl(&dmn->info.caps)) + init_attr.isolate_vl_tc = dmn->info.caps.isolate_vl_tc; + + spin_lock_init(&dmn->send_ring->lock); + + dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr); + if (!dmn->send_ring->qp) { + mlx5dr_err(dmn, "Failed creating QP\n"); + ret = -ENOMEM; + goto clean_cq; + } + + dmn->send_ring->cq->qp = dmn->send_ring->qp; + + dmn->info.max_send_wr = QUEUE_SIZE; + dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data, + DR_STE_SIZE); + + dmn->send_ring->signal_th = dmn->info.max_send_wr / + SIGNAL_PER_DIV_QUEUE; + + /* Prepare qp to be used */ + ret = dr_prepare_qp_to_rts(dmn); + if (ret) + goto clean_qp; + + dmn->send_ring->max_post_send_size = + mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K, + DR_ICM_TYPE_STE); + + /* Allocating the max size as a buffer for writing */ + size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size; + dmn->send_ring->buf = kzalloc(size, GFP_KERNEL); + if (!dmn->send_ring->buf) { + ret = -ENOMEM; + goto clean_qp; + } + + dmn->send_ring->buf_size = size; + + dmn->send_ring->mr = dr_reg_mr(dmn->mdev, + dmn->pdn, dmn->send_ring->buf, size); + if (!dmn->send_ring->mr) { + ret = -ENOMEM; + goto free_mem; + } + + dmn->send_ring->sync_buff = kzalloc(dmn->send_ring->max_post_send_size, + GFP_KERNEL); + if (!dmn->send_ring->sync_buff) { + ret = -ENOMEM; + goto clean_mr; + } + + dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev, + dmn->pdn, dmn->send_ring->sync_buff, + dmn->send_ring->max_post_send_size); + if (!dmn->send_ring->sync_mr) { + ret = -ENOMEM; + goto free_sync_mem; + } + + return 0; + +free_sync_mem: + kfree(dmn->send_ring->sync_buff); +clean_mr: + dr_dereg_mr(dmn->mdev, dmn->send_ring->mr); +free_mem: + kfree(dmn->send_ring->buf); +clean_qp: + dr_destroy_qp(dmn->mdev, dmn->send_ring->qp); +clean_cq: + dr_destroy_cq(dmn->mdev, dmn->send_ring->cq); +free_send_ring: + kfree(dmn->send_ring); + + return ret; +} + +void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, + struct mlx5dr_send_ring *send_ring) +{ + dr_destroy_qp(dmn->mdev, send_ring->qp); + dr_destroy_cq(dmn->mdev, send_ring->cq); + dr_dereg_mr(dmn->mdev, send_ring->sync_mr); + dr_dereg_mr(dmn->mdev, send_ring->mr); + kfree(send_ring->buf); + kfree(send_ring->sync_buff); + kfree(send_ring); +} + +int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn) +{ + struct mlx5dr_send_ring *send_ring = dmn->send_ring; + struct postsend_info send_info = {}; + u8 data[DR_STE_SIZE]; + int num_of_sends_req; + int ret; + int i; + + /* Sending this amount of requests makes sure we will get drain */ + num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2; + + /* Send fake requests forcing the last to be signaled */ + send_info.write.addr = (uintptr_t)data; + send_info.write.length = DR_STE_SIZE; + send_info.write.lkey = 0; + /* Using the sync_mr in order to write/read */ + send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr; + send_info.rkey = send_ring->sync_mr->mkey; + + for (i = 0; i < num_of_sends_req; i++) { + ret = dr_postsend_icm_data(dmn, &send_info); + if (ret) + return ret; + } + + spin_lock(&send_ring->lock); + ret = dr_handle_pending_wc(dmn, send_ring); + spin_unlock(&send_ring->lock); + + return ret; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c new file mode 100644 index 0000000000..e94fbb015e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -0,0 +1,1463 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include +#include +#include "dr_ste.h" + +struct dr_hw_ste_format { + u8 ctrl[DR_STE_SIZE_CTRL]; + u8 tag[DR_STE_SIZE_TAG]; + u8 mask[DR_STE_SIZE_MASK]; +}; + +static u32 dr_ste_crc32_calc(const void *input_data, size_t length) +{ + u32 crc = crc32(0, input_data, length); + + return (__force u32)((crc >> 24) & 0xff) | ((crc << 8) & 0xff0000) | + ((crc >> 8) & 0xff00) | ((crc << 24) & 0xff000000); +} + +bool mlx5dr_ste_supp_ttl_cs_recalc(struct mlx5dr_cmd_caps *caps) +{ + return caps->sw_format_ver > MLX5_STEERING_FORMAT_CONNECTX_5; +} + +u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl) +{ + u32 num_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(htbl->chunk); + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + u8 masked[DR_STE_SIZE_TAG] = {}; + u32 crc32, index; + u16 bit; + int i; + + /* Don't calculate CRC if the result is predicted */ + if (num_entries == 1 || htbl->byte_mask == 0) + return 0; + + /* Mask tag using byte mask, bit per byte */ + bit = 1 << (DR_STE_SIZE_TAG - 1); + for (i = 0; i < DR_STE_SIZE_TAG; i++) { + if (htbl->byte_mask & bit) + masked[i] = hw_ste->tag[i]; + + bit = bit >> 1; + } + + crc32 = dr_ste_crc32_calc(masked, DR_STE_SIZE_TAG); + index = crc32 & (num_entries - 1); + + return index; +} + +u16 mlx5dr_ste_conv_bit_to_byte_mask(u8 *bit_mask) +{ + u16 byte_mask = 0; + int i; + + for (i = 0; i < DR_STE_SIZE_MASK; i++) { + byte_mask = byte_mask << 1; + if (bit_mask[i] == 0xff) + byte_mask |= 1; + } + return byte_mask; +} + +static u8 *dr_ste_get_tag(u8 *hw_ste_p) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + + return hw_ste->tag; +} + +void mlx5dr_ste_set_bit_mask(u8 *hw_ste_p, u8 *bit_mask) +{ + struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; + + memcpy(hw_ste->mask, bit_mask, DR_STE_SIZE_MASK); +} + +static void dr_ste_set_always_hit(struct dr_hw_ste_format *hw_ste) +{ + memset(&hw_ste->tag, 0, sizeof(hw_ste->tag)); + memset(&hw_ste->mask, 0, sizeof(hw_ste->mask)); +} + +static void dr_ste_set_always_miss(struct dr_hw_ste_format *hw_ste) +{ + hw_ste->tag[0] = 0xdc; + hw_ste->mask[0] = 0; +} + +bool mlx5dr_ste_is_miss_addr_set(struct mlx5dr_ste_ctx *ste_ctx, + u8 *hw_ste_p) +{ + if (!ste_ctx->is_miss_addr_set) + return false; + + /* check if miss address is already set for this type of STE */ + return ste_ctx->is_miss_addr_set(hw_ste_p); +} + +void mlx5dr_ste_set_miss_addr(struct mlx5dr_ste_ctx *ste_ctx, + u8 *hw_ste_p, u64 miss_addr) +{ + ste_ctx->set_miss_addr(hw_ste_p, miss_addr); +} + +static void dr_ste_always_miss_addr(struct mlx5dr_ste_ctx *ste_ctx, + u8 *hw_ste, u64 miss_addr) +{ + ste_ctx->set_next_lu_type(hw_ste, MLX5DR_STE_LU_TYPE_DONT_CARE); + ste_ctx->set_miss_addr(hw_ste, miss_addr); + dr_ste_set_always_miss((struct dr_hw_ste_format *)hw_ste); +} + +void mlx5dr_ste_set_hit_addr(struct mlx5dr_ste_ctx *ste_ctx, + u8 *hw_ste, u64 icm_addr, u32 ht_size) +{ + ste_ctx->set_hit_addr(hw_ste, icm_addr, ht_size); +} + +u64 mlx5dr_ste_get_icm_addr(struct mlx5dr_ste *ste) +{ + u64 base_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(ste->htbl->chunk); + u32 index = ste - ste->htbl->chunk->ste_arr; + + return base_icm_addr + DR_STE_SIZE * index; +} + +u64 mlx5dr_ste_get_mr_addr(struct mlx5dr_ste *ste) +{ + u32 index = ste - ste->htbl->chunk->ste_arr; + + return mlx5dr_icm_pool_get_chunk_mr_addr(ste->htbl->chunk) + DR_STE_SIZE * index; +} + +u8 *mlx5dr_ste_get_hw_ste(struct mlx5dr_ste *ste) +{ + u64 index = ste - ste->htbl->chunk->ste_arr; + + return ste->htbl->chunk->hw_ste_arr + DR_STE_SIZE_REDUCED * index; +} + +struct list_head *mlx5dr_ste_get_miss_list(struct mlx5dr_ste *ste) +{ + u32 index = ste - ste->htbl->chunk->ste_arr; + + return &ste->htbl->chunk->miss_list[index]; +} + +static void dr_ste_always_hit_htbl(struct mlx5dr_ste_ctx *ste_ctx, + u8 *hw_ste, + struct mlx5dr_ste_htbl *next_htbl) +{ + struct mlx5dr_icm_chunk *chunk = next_htbl->chunk; + + ste_ctx->set_byte_mask(hw_ste, next_htbl->byte_mask); + ste_ctx->set_next_lu_type(hw_ste, next_htbl->lu_type); + ste_ctx->set_hit_addr(hw_ste, mlx5dr_icm_pool_get_chunk_icm_addr(chunk), + mlx5dr_icm_pool_get_chunk_num_of_entries(chunk)); + + dr_ste_set_always_hit((struct dr_hw_ste_format *)hw_ste); +} + +bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher, + u8 ste_location) +{ + return ste_location == nic_matcher->num_of_builders; +} + +/* Replace relevant fields, except of: + * htbl - keep the origin htbl + * miss_list + list - already took the src from the list. + * icm_addr/mr_addr - depends on the hosting table. + * + * Before: + * | a | -> | b | -> | c | -> + * + * After: + * | a | -> | c | -> + * While the data that was in b copied to a. + */ +static void dr_ste_replace(struct mlx5dr_ste *dst, struct mlx5dr_ste *src) +{ + memcpy(mlx5dr_ste_get_hw_ste(dst), mlx5dr_ste_get_hw_ste(src), + DR_STE_SIZE_REDUCED); + dst->next_htbl = src->next_htbl; + if (dst->next_htbl) + dst->next_htbl->pointing_ste = dst; + + dst->refcount = src->refcount; +} + +/* Free ste which is the head and the only one in miss_list */ +static void +dr_ste_remove_head_ste(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste *ste, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste_send_info *ste_info_head, + struct list_head *send_ste_list, + struct mlx5dr_ste_htbl *stats_tbl) +{ + u8 tmp_data_ste[DR_STE_SIZE] = {}; + u64 miss_addr; + + miss_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); + + /* Use temp ste because dr_ste_always_miss_addr + * touches bit_mask area which doesn't exist at ste->hw_ste. + * Need to use a full-sized (DR_STE_SIZE) hw_ste. + */ + memcpy(tmp_data_ste, mlx5dr_ste_get_hw_ste(ste), DR_STE_SIZE_REDUCED); + dr_ste_always_miss_addr(ste_ctx, tmp_data_ste, miss_addr); + memcpy(mlx5dr_ste_get_hw_ste(ste), tmp_data_ste, DR_STE_SIZE_REDUCED); + + list_del_init(&ste->miss_list_node); + + /* Write full STE size in order to have "always_miss" */ + mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE, + 0, tmp_data_ste, + ste_info_head, + send_ste_list, + true /* Copy data */); + + stats_tbl->ctrl.num_of_valid_entries--; +} + +/* Free ste which is the head but NOT the only one in miss_list: + * |_ste_| --> |_next_ste_| -->|__| -->|__| -->/0 + */ +static void +dr_ste_replace_head_ste(struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *ste, + struct mlx5dr_ste *next_ste, + struct mlx5dr_ste_send_info *ste_info_head, + struct list_head *send_ste_list, + struct mlx5dr_ste_htbl *stats_tbl) + +{ + struct mlx5dr_ste_htbl *next_miss_htbl; + u8 hw_ste[DR_STE_SIZE] = {}; + int sb_idx; + + next_miss_htbl = next_ste->htbl; + + /* Remove from the miss_list the next_ste before copy */ + list_del_init(&next_ste->miss_list_node); + + /* Move data from next into ste */ + dr_ste_replace(ste, next_ste); + + /* Update the rule on STE change */ + mlx5dr_rule_set_last_member(next_ste->rule_rx_tx, ste, false); + + /* Copy all 64 hw_ste bytes */ + memcpy(hw_ste, mlx5dr_ste_get_hw_ste(ste), DR_STE_SIZE_REDUCED); + sb_idx = ste->ste_chain_location - 1; + mlx5dr_ste_set_bit_mask(hw_ste, + nic_matcher->ste_builder[sb_idx].bit_mask); + + /* Del the htbl that contains the next_ste. + * The origin htbl stay with the same number of entries. + */ + mlx5dr_htbl_put(next_miss_htbl); + + mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE, + 0, hw_ste, + ste_info_head, + send_ste_list, + true /* Copy data */); + + stats_tbl->ctrl.num_of_collisions--; + stats_tbl->ctrl.num_of_valid_entries--; +} + +/* Free ste that is located in the middle of the miss list: + * |__| -->|_prev_ste_|->|_ste_|-->|_next_ste_| + */ +static void dr_ste_remove_middle_ste(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste *ste, + struct mlx5dr_ste_send_info *ste_info, + struct list_head *send_ste_list, + struct mlx5dr_ste_htbl *stats_tbl) +{ + struct mlx5dr_ste *prev_ste; + u64 miss_addr; + + prev_ste = list_prev_entry(ste, miss_list_node); + if (WARN_ON(!prev_ste)) + return; + + miss_addr = ste_ctx->get_miss_addr(mlx5dr_ste_get_hw_ste(ste)); + ste_ctx->set_miss_addr(mlx5dr_ste_get_hw_ste(prev_ste), miss_addr); + + mlx5dr_send_fill_and_append_ste_send_info(prev_ste, DR_STE_SIZE_CTRL, 0, + mlx5dr_ste_get_hw_ste(prev_ste), + ste_info, send_ste_list, + true /* Copy data*/); + + list_del_init(&ste->miss_list_node); + + stats_tbl->ctrl.num_of_valid_entries--; + stats_tbl->ctrl.num_of_collisions--; +} + +void mlx5dr_ste_free(struct mlx5dr_ste *ste, + struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + struct mlx5dr_ste_send_info *cur_ste_info, *tmp_ste_info; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx; + struct mlx5dr_ste_send_info ste_info_head; + struct mlx5dr_ste *next_ste, *first_ste; + bool put_on_origin_table = true; + struct mlx5dr_ste_htbl *stats_tbl; + LIST_HEAD(send_ste_list); + + first_ste = list_first_entry(mlx5dr_ste_get_miss_list(ste), + struct mlx5dr_ste, miss_list_node); + stats_tbl = first_ste->htbl; + + /* Two options: + * 1. ste is head: + * a. head ste is the only ste in the miss list + * b. head ste is not the only ste in the miss-list + * 2. ste is not head + */ + if (first_ste == ste) { /* Ste is the head */ + struct mlx5dr_ste *last_ste; + + last_ste = list_last_entry(mlx5dr_ste_get_miss_list(ste), + struct mlx5dr_ste, miss_list_node); + if (last_ste == first_ste) + next_ste = NULL; + else + next_ste = list_next_entry(ste, miss_list_node); + + if (!next_ste) { + /* One and only entry in the list */ + dr_ste_remove_head_ste(ste_ctx, ste, + nic_matcher, + &ste_info_head, + &send_ste_list, + stats_tbl); + } else { + /* First but not only entry in the list */ + dr_ste_replace_head_ste(nic_matcher, ste, + next_ste, &ste_info_head, + &send_ste_list, stats_tbl); + put_on_origin_table = false; + } + } else { /* Ste in the middle of the list */ + dr_ste_remove_middle_ste(ste_ctx, ste, + &ste_info_head, &send_ste_list, + stats_tbl); + } + + /* Update HW */ + list_for_each_entry_safe(cur_ste_info, tmp_ste_info, + &send_ste_list, send_list) { + list_del(&cur_ste_info->send_list); + mlx5dr_send_postsend_ste(dmn, cur_ste_info->ste, + cur_ste_info->data, cur_ste_info->size, + cur_ste_info->offset); + } + + if (put_on_origin_table) + mlx5dr_htbl_put(ste->htbl); +} + +bool mlx5dr_ste_equal_tag(void *src, void *dst) +{ + struct dr_hw_ste_format *s_hw_ste = (struct dr_hw_ste_format *)src; + struct dr_hw_ste_format *d_hw_ste = (struct dr_hw_ste_format *)dst; + + return !memcmp(s_hw_ste->tag, d_hw_ste->tag, DR_STE_SIZE_TAG); +} + +void mlx5dr_ste_set_hit_addr_by_next_htbl(struct mlx5dr_ste_ctx *ste_ctx, + u8 *hw_ste, + struct mlx5dr_ste_htbl *next_htbl) +{ + u64 icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(next_htbl->chunk); + u32 num_entries = + mlx5dr_icm_pool_get_chunk_num_of_entries(next_htbl->chunk); + + ste_ctx->set_hit_addr(hw_ste, icm_addr, num_entries); +} + +void mlx5dr_ste_prepare_for_postsend(struct mlx5dr_ste_ctx *ste_ctx, + u8 *hw_ste_p, u32 ste_size) +{ + if (ste_ctx->prepare_for_postsend) + ste_ctx->prepare_for_postsend(hw_ste_p, ste_size); +} + +/* Init one ste as a pattern for ste data array */ +void mlx5dr_ste_set_formatted_ste(struct mlx5dr_ste_ctx *ste_ctx, + u16 gvmi, + enum mlx5dr_domain_nic_type nic_type, + struct mlx5dr_ste_htbl *htbl, + u8 *formatted_ste, + struct mlx5dr_htbl_connect_info *connect_info) +{ + bool is_rx = nic_type == DR_DOMAIN_NIC_TYPE_RX; + u8 tmp_hw_ste[DR_STE_SIZE] = {0}; + + ste_ctx->ste_init(formatted_ste, htbl->lu_type, is_rx, gvmi); + + /* Use temp ste because dr_ste_always_miss_addr/hit_htbl + * touches bit_mask area which doesn't exist at ste->hw_ste. + * Need to use a full-sized (DR_STE_SIZE) hw_ste. + */ + memcpy(tmp_hw_ste, formatted_ste, DR_STE_SIZE_REDUCED); + if (connect_info->type == CONNECT_HIT) + dr_ste_always_hit_htbl(ste_ctx, tmp_hw_ste, + connect_info->hit_next_htbl); + else + dr_ste_always_miss_addr(ste_ctx, tmp_hw_ste, + connect_info->miss_icm_addr); + memcpy(formatted_ste, tmp_hw_ste, DR_STE_SIZE_REDUCED); +} + +int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn, + struct mlx5dr_domain_rx_tx *nic_dmn, + struct mlx5dr_ste_htbl *htbl, + struct mlx5dr_htbl_connect_info *connect_info, + bool update_hw_ste) +{ + u8 formatted_ste[DR_STE_SIZE] = {}; + + mlx5dr_ste_set_formatted_ste(dmn->ste_ctx, + dmn->info.caps.gvmi, + nic_dmn->type, + htbl, + formatted_ste, + connect_info); + + return mlx5dr_send_postsend_formatted_htbl(dmn, htbl, formatted_ste, update_hw_ste); +} + +int mlx5dr_ste_create_next_htbl(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *ste, + u8 *cur_hw_ste, + enum mlx5dr_icm_chunk_size log_table_size) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_ste_htbl *next_htbl; + + if (!mlx5dr_ste_is_last_in_rule(nic_matcher, ste->ste_chain_location)) { + u16 next_lu_type; + u16 byte_mask; + + next_lu_type = ste_ctx->get_next_lu_type(cur_hw_ste); + byte_mask = ste_ctx->get_byte_mask(cur_hw_ste); + + next_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + log_table_size, + next_lu_type, + byte_mask); + if (!next_htbl) { + mlx5dr_dbg(dmn, "Failed allocating table\n"); + return -ENOMEM; + } + + /* Write new table to HW */ + info.type = CONNECT_MISS; + info.miss_icm_addr = + mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); + if (mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, next_htbl, + &info, false)) { + mlx5dr_info(dmn, "Failed writing table to HW\n"); + goto free_table; + } + + mlx5dr_ste_set_hit_addr_by_next_htbl(ste_ctx, + cur_hw_ste, next_htbl); + ste->next_htbl = next_htbl; + next_htbl->pointing_ste = ste; + } + + return 0; + +free_table: + mlx5dr_ste_htbl_free(next_htbl); + return -ENOENT; +} + +struct mlx5dr_ste_htbl *mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size, + u16 lu_type, u16 byte_mask) +{ + struct mlx5dr_icm_chunk *chunk; + struct mlx5dr_ste_htbl *htbl; + u32 num_entries; + int i; + + htbl = mlx5dr_icm_pool_alloc_htbl(pool); + if (!htbl) + return NULL; + + chunk = mlx5dr_icm_alloc_chunk(pool, chunk_size); + if (!chunk) + goto out_free_htbl; + + htbl->chunk = chunk; + htbl->lu_type = lu_type; + htbl->byte_mask = byte_mask; + htbl->refcount = 0; + htbl->pointing_ste = NULL; + htbl->ctrl.num_of_valid_entries = 0; + htbl->ctrl.num_of_collisions = 0; + num_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(chunk); + + for (i = 0; i < num_entries; i++) { + struct mlx5dr_ste *ste = &chunk->ste_arr[i]; + + ste->htbl = htbl; + ste->refcount = 0; + INIT_LIST_HEAD(&ste->miss_list_node); + INIT_LIST_HEAD(&chunk->miss_list[i]); + } + + return htbl; + +out_free_htbl: + mlx5dr_icm_pool_free_htbl(pool, htbl); + return NULL; +} + +int mlx5dr_ste_htbl_free(struct mlx5dr_ste_htbl *htbl) +{ + struct mlx5dr_icm_pool *pool = htbl->chunk->buddy_mem->pool; + + if (htbl->refcount) + return -EBUSY; + + mlx5dr_icm_free_chunk(htbl->chunk); + mlx5dr_icm_pool_free_htbl(pool, htbl); + + return 0; +} + +void mlx5dr_ste_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_domain *dmn, + u8 *action_type_set, + u8 *hw_ste_arr, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes) +{ + ste_ctx->set_actions_tx(dmn, action_type_set, ste_ctx->actions_caps, + hw_ste_arr, attr, added_stes); +} + +void mlx5dr_ste_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_domain *dmn, + u8 *action_type_set, + u8 *hw_ste_arr, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes) +{ + ste_ctx->set_actions_rx(dmn, action_type_set, ste_ctx->actions_caps, + hw_ste_arr, attr, added_stes); +} + +const struct mlx5dr_ste_action_modify_field * +mlx5dr_ste_conv_modify_hdr_sw_field(struct mlx5dr_ste_ctx *ste_ctx, u16 sw_field) +{ + const struct mlx5dr_ste_action_modify_field *hw_field; + + if (sw_field >= ste_ctx->modify_field_arr_sz) + return NULL; + + hw_field = &ste_ctx->modify_field_arr[sw_field]; + if (!hw_field->end && !hw_field->start) + return NULL; + + return hw_field; +} + +void mlx5dr_ste_set_action_set(struct mlx5dr_ste_ctx *ste_ctx, + __be64 *hw_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data) +{ + ste_ctx->set_action_set((u8 *)hw_action, + hw_field, shifter, length, data); +} + +void mlx5dr_ste_set_action_add(struct mlx5dr_ste_ctx *ste_ctx, + __be64 *hw_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data) +{ + ste_ctx->set_action_add((u8 *)hw_action, + hw_field, shifter, length, data); +} + +void mlx5dr_ste_set_action_copy(struct mlx5dr_ste_ctx *ste_ctx, + __be64 *hw_action, + u8 dst_hw_field, + u8 dst_shifter, + u8 dst_len, + u8 src_hw_field, + u8 src_shifter) +{ + ste_ctx->set_action_copy((u8 *)hw_action, + dst_hw_field, dst_shifter, dst_len, + src_hw_field, src_shifter); +} + +int mlx5dr_ste_set_action_decap_l3_list(struct mlx5dr_ste_ctx *ste_ctx, + void *data, u32 data_sz, + u8 *hw_action, u32 hw_action_sz, + u16 *used_hw_action_num) +{ + /* Only Ethernet frame is supported, with VLAN (18) or without (14) */ + if (data_sz != HDR_LEN_L2 && data_sz != HDR_LEN_L2_W_VLAN) + return -EINVAL; + + return ste_ctx->set_action_decap_l3_list(data, data_sz, + hw_action, hw_action_sz, + used_hw_action_num); +} + +static int +dr_ste_alloc_modify_hdr_chunk(struct mlx5dr_action *action) +{ + struct mlx5dr_domain *dmn = action->rewrite->dmn; + u32 chunk_size; + int ret; + + chunk_size = ilog2(roundup_pow_of_two(action->rewrite->num_of_actions)); + + /* HW modify action index granularity is at least 64B */ + chunk_size = max_t(u32, chunk_size, DR_CHUNK_SIZE_8); + + action->rewrite->chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool, + chunk_size); + if (!action->rewrite->chunk) + return -ENOMEM; + + action->rewrite->index = (mlx5dr_icm_pool_get_chunk_icm_addr(action->rewrite->chunk) - + dmn->info.caps.hdr_modify_icm_addr) / + DR_ACTION_CACHE_LINE_SIZE; + + ret = mlx5dr_send_postsend_action(action->rewrite->dmn, action); + if (ret) + goto free_chunk; + + return 0; + +free_chunk: + mlx5dr_icm_free_chunk(action->rewrite->chunk); + return -ENOMEM; +} + +static void dr_ste_free_modify_hdr_chunk(struct mlx5dr_action *action) +{ + mlx5dr_icm_free_chunk(action->rewrite->chunk); +} + +int mlx5dr_ste_alloc_modify_hdr(struct mlx5dr_action *action) +{ + struct mlx5dr_domain *dmn = action->rewrite->dmn; + + if (mlx5dr_domain_is_support_ptrn_arg(dmn)) + return dmn->ste_ctx->alloc_modify_hdr_chunk(action); + + return dr_ste_alloc_modify_hdr_chunk(action); +} + +void mlx5dr_ste_free_modify_hdr(struct mlx5dr_action *action) +{ + struct mlx5dr_domain *dmn = action->rewrite->dmn; + + if (mlx5dr_domain_is_support_ptrn_arg(dmn)) + return dmn->ste_ctx->dealloc_modify_hdr_chunk(action); + + return dr_ste_free_modify_hdr_chunk(action); +} + +static int dr_ste_build_pre_check_spec(struct mlx5dr_domain *dmn, + struct mlx5dr_match_spec *spec) +{ + if (spec->ip_version) { + if (spec->ip_version != 0xf) { + mlx5dr_err(dmn, + "Partial ip_version mask with src/dst IP is not supported\n"); + return -EINVAL; + } + } else if (spec->ethertype != 0xffff && + (DR_MASK_IS_SRC_IP_SET(spec) || DR_MASK_IS_DST_IP_SET(spec))) { + mlx5dr_err(dmn, + "Partial/no ethertype mask with src/dst IP is not supported\n"); + return -EINVAL; + } + + return 0; +} + +int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn, + u8 match_criteria, + struct mlx5dr_match_param *mask, + struct mlx5dr_match_param *value) +{ + if (value) + return 0; + + if (match_criteria & DR_MATCHER_CRITERIA_MISC) { + if (mask->misc.source_port && mask->misc.source_port != 0xffff) { + mlx5dr_err(dmn, + "Partial mask source_port is not supported\n"); + return -EINVAL; + } + if (mask->misc.source_eswitch_owner_vhca_id && + mask->misc.source_eswitch_owner_vhca_id != 0xffff) { + mlx5dr_err(dmn, + "Partial mask source_eswitch_owner_vhca_id is not supported\n"); + return -EINVAL; + } + } + + if ((match_criteria & DR_MATCHER_CRITERIA_OUTER) && + dr_ste_build_pre_check_spec(dmn, &mask->outer)) + return -EINVAL; + + if ((match_criteria & DR_MATCHER_CRITERIA_INNER) && + dr_ste_build_pre_check_spec(dmn, &mask->inner)) + return -EINVAL; + + return 0; +} + +int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_match_param *value, + u8 *ste_arr) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn; + bool is_rx = nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx; + struct mlx5dr_ste_build *sb; + int ret, i; + + ret = mlx5dr_ste_build_pre_check(dmn, matcher->match_criteria, + &matcher->mask, value); + if (ret) + return ret; + + sb = nic_matcher->ste_builder; + for (i = 0; i < nic_matcher->num_of_builders; i++) { + ste_ctx->ste_init(ste_arr, + sb->lu_type, + is_rx, + dmn->info.caps.gvmi); + + mlx5dr_ste_set_bit_mask(ste_arr, sb->bit_mask); + + ret = sb->ste_build_tag_func(value, sb, dr_ste_get_tag(ste_arr)); + if (ret) + return ret; + + /* Connect the STEs */ + if (i < (nic_matcher->num_of_builders - 1)) { + /* Need the next builder for these fields, + * not relevant for the last ste in the chain. + */ + sb++; + ste_ctx->set_next_lu_type(ste_arr, sb->lu_type); + ste_ctx->set_byte_mask(ste_arr, sb->byte_mask); + } + ste_arr += DR_STE_SIZE; + } + return 0; +} + +#define IFC_GET_CLR(typ, p, fld, clear) ({ \ + void *__p = (p); \ + u32 __t = MLX5_GET(typ, __p, fld); \ + if (clear) \ + MLX5_SET(typ, __p, fld, 0); \ + __t; \ +}) + +#define memcpy_and_clear(to, from, len, clear) ({ \ + void *__to = (to), *__from = (from); \ + size_t __len = (len); \ + memcpy(__to, __from, __len); \ + if (clear) \ + memset(__from, 0, __len); \ +}) + +static void dr_ste_copy_mask_misc(char *mask, struct mlx5dr_match_misc *spec, bool clr) +{ + spec->gre_c_present = IFC_GET_CLR(fte_match_set_misc, mask, gre_c_present, clr); + spec->gre_k_present = IFC_GET_CLR(fte_match_set_misc, mask, gre_k_present, clr); + spec->gre_s_present = IFC_GET_CLR(fte_match_set_misc, mask, gre_s_present, clr); + spec->source_vhca_port = IFC_GET_CLR(fte_match_set_misc, mask, source_vhca_port, clr); + spec->source_sqn = IFC_GET_CLR(fte_match_set_misc, mask, source_sqn, clr); + + spec->source_port = IFC_GET_CLR(fte_match_set_misc, mask, source_port, clr); + spec->source_eswitch_owner_vhca_id = + IFC_GET_CLR(fte_match_set_misc, mask, source_eswitch_owner_vhca_id, clr); + + spec->outer_second_prio = IFC_GET_CLR(fte_match_set_misc, mask, outer_second_prio, clr); + spec->outer_second_cfi = IFC_GET_CLR(fte_match_set_misc, mask, outer_second_cfi, clr); + spec->outer_second_vid = IFC_GET_CLR(fte_match_set_misc, mask, outer_second_vid, clr); + spec->inner_second_prio = IFC_GET_CLR(fte_match_set_misc, mask, inner_second_prio, clr); + spec->inner_second_cfi = IFC_GET_CLR(fte_match_set_misc, mask, inner_second_cfi, clr); + spec->inner_second_vid = IFC_GET_CLR(fte_match_set_misc, mask, inner_second_vid, clr); + + spec->outer_second_cvlan_tag = + IFC_GET_CLR(fte_match_set_misc, mask, outer_second_cvlan_tag, clr); + spec->inner_second_cvlan_tag = + IFC_GET_CLR(fte_match_set_misc, mask, inner_second_cvlan_tag, clr); + spec->outer_second_svlan_tag = + IFC_GET_CLR(fte_match_set_misc, mask, outer_second_svlan_tag, clr); + spec->inner_second_svlan_tag = + IFC_GET_CLR(fte_match_set_misc, mask, inner_second_svlan_tag, clr); + spec->gre_protocol = IFC_GET_CLR(fte_match_set_misc, mask, gre_protocol, clr); + + spec->gre_key_h = IFC_GET_CLR(fte_match_set_misc, mask, gre_key.nvgre.hi, clr); + spec->gre_key_l = IFC_GET_CLR(fte_match_set_misc, mask, gre_key.nvgre.lo, clr); + + spec->vxlan_vni = IFC_GET_CLR(fte_match_set_misc, mask, vxlan_vni, clr); + + spec->geneve_vni = IFC_GET_CLR(fte_match_set_misc, mask, geneve_vni, clr); + spec->geneve_tlv_option_0_exist = + IFC_GET_CLR(fte_match_set_misc, mask, geneve_tlv_option_0_exist, clr); + spec->geneve_oam = IFC_GET_CLR(fte_match_set_misc, mask, geneve_oam, clr); + + spec->outer_ipv6_flow_label = + IFC_GET_CLR(fte_match_set_misc, mask, outer_ipv6_flow_label, clr); + + spec->inner_ipv6_flow_label = + IFC_GET_CLR(fte_match_set_misc, mask, inner_ipv6_flow_label, clr); + + spec->geneve_opt_len = IFC_GET_CLR(fte_match_set_misc, mask, geneve_opt_len, clr); + spec->geneve_protocol_type = + IFC_GET_CLR(fte_match_set_misc, mask, geneve_protocol_type, clr); + + spec->bth_dst_qp = IFC_GET_CLR(fte_match_set_misc, mask, bth_dst_qp, clr); +} + +static void dr_ste_copy_mask_spec(char *mask, struct mlx5dr_match_spec *spec, bool clr) +{ + __be32 raw_ip[4]; + + spec->smac_47_16 = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, smac_47_16, clr); + + spec->smac_15_0 = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, smac_15_0, clr); + spec->ethertype = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ethertype, clr); + + spec->dmac_47_16 = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, dmac_47_16, clr); + + spec->dmac_15_0 = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, dmac_15_0, clr); + spec->first_prio = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, first_prio, clr); + spec->first_cfi = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, first_cfi, clr); + spec->first_vid = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, first_vid, clr); + + spec->ip_protocol = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ip_protocol, clr); + spec->ip_dscp = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ip_dscp, clr); + spec->ip_ecn = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ip_ecn, clr); + spec->cvlan_tag = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, cvlan_tag, clr); + spec->svlan_tag = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, svlan_tag, clr); + spec->frag = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, frag, clr); + spec->ip_version = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ip_version, clr); + spec->tcp_flags = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, tcp_flags, clr); + spec->tcp_sport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, tcp_sport, clr); + spec->tcp_dport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, tcp_dport, clr); + + spec->ipv4_ihl = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ipv4_ihl, clr); + spec->ttl_hoplimit = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, ttl_hoplimit, clr); + + spec->udp_sport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, udp_sport, clr); + spec->udp_dport = IFC_GET_CLR(fte_match_set_lyr_2_4, mask, udp_dport, clr); + + memcpy_and_clear(raw_ip, MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + sizeof(raw_ip), clr); + + spec->src_ip_127_96 = be32_to_cpu(raw_ip[0]); + spec->src_ip_95_64 = be32_to_cpu(raw_ip[1]); + spec->src_ip_63_32 = be32_to_cpu(raw_ip[2]); + spec->src_ip_31_0 = be32_to_cpu(raw_ip[3]); + + memcpy_and_clear(raw_ip, MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + sizeof(raw_ip), clr); + + spec->dst_ip_127_96 = be32_to_cpu(raw_ip[0]); + spec->dst_ip_95_64 = be32_to_cpu(raw_ip[1]); + spec->dst_ip_63_32 = be32_to_cpu(raw_ip[2]); + spec->dst_ip_31_0 = be32_to_cpu(raw_ip[3]); +} + +static void dr_ste_copy_mask_misc2(char *mask, struct mlx5dr_match_misc2 *spec, bool clr) +{ + spec->outer_first_mpls_label = + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls.mpls_label, clr); + spec->outer_first_mpls_exp = + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls.mpls_exp, clr); + spec->outer_first_mpls_s_bos = + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls.mpls_s_bos, clr); + spec->outer_first_mpls_ttl = + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls.mpls_ttl, clr); + spec->inner_first_mpls_label = + IFC_GET_CLR(fte_match_set_misc2, mask, inner_first_mpls.mpls_label, clr); + spec->inner_first_mpls_exp = + IFC_GET_CLR(fte_match_set_misc2, mask, inner_first_mpls.mpls_exp, clr); + spec->inner_first_mpls_s_bos = + IFC_GET_CLR(fte_match_set_misc2, mask, inner_first_mpls.mpls_s_bos, clr); + spec->inner_first_mpls_ttl = + IFC_GET_CLR(fte_match_set_misc2, mask, inner_first_mpls.mpls_ttl, clr); + spec->outer_first_mpls_over_gre_label = + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_label, clr); + spec->outer_first_mpls_over_gre_exp = + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_exp, clr); + spec->outer_first_mpls_over_gre_s_bos = + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_s_bos, clr); + spec->outer_first_mpls_over_gre_ttl = + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_gre.mpls_ttl, clr); + spec->outer_first_mpls_over_udp_label = + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_label, clr); + spec->outer_first_mpls_over_udp_exp = + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_exp, clr); + spec->outer_first_mpls_over_udp_s_bos = + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_s_bos, clr); + spec->outer_first_mpls_over_udp_ttl = + IFC_GET_CLR(fte_match_set_misc2, mask, outer_first_mpls_over_udp.mpls_ttl, clr); + spec->metadata_reg_c_7 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_7, clr); + spec->metadata_reg_c_6 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_6, clr); + spec->metadata_reg_c_5 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_5, clr); + spec->metadata_reg_c_4 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_4, clr); + spec->metadata_reg_c_3 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_3, clr); + spec->metadata_reg_c_2 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_2, clr); + spec->metadata_reg_c_1 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_1, clr); + spec->metadata_reg_c_0 = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_c_0, clr); + spec->metadata_reg_a = IFC_GET_CLR(fte_match_set_misc2, mask, metadata_reg_a, clr); +} + +static void dr_ste_copy_mask_misc3(char *mask, struct mlx5dr_match_misc3 *spec, bool clr) +{ + spec->inner_tcp_seq_num = IFC_GET_CLR(fte_match_set_misc3, mask, inner_tcp_seq_num, clr); + spec->outer_tcp_seq_num = IFC_GET_CLR(fte_match_set_misc3, mask, outer_tcp_seq_num, clr); + spec->inner_tcp_ack_num = IFC_GET_CLR(fte_match_set_misc3, mask, inner_tcp_ack_num, clr); + spec->outer_tcp_ack_num = IFC_GET_CLR(fte_match_set_misc3, mask, outer_tcp_ack_num, clr); + spec->outer_vxlan_gpe_vni = + IFC_GET_CLR(fte_match_set_misc3, mask, outer_vxlan_gpe_vni, clr); + spec->outer_vxlan_gpe_next_protocol = + IFC_GET_CLR(fte_match_set_misc3, mask, outer_vxlan_gpe_next_protocol, clr); + spec->outer_vxlan_gpe_flags = + IFC_GET_CLR(fte_match_set_misc3, mask, outer_vxlan_gpe_flags, clr); + spec->icmpv4_header_data = IFC_GET_CLR(fte_match_set_misc3, mask, icmp_header_data, clr); + spec->icmpv6_header_data = + IFC_GET_CLR(fte_match_set_misc3, mask, icmpv6_header_data, clr); + spec->icmpv4_type = IFC_GET_CLR(fte_match_set_misc3, mask, icmp_type, clr); + spec->icmpv4_code = IFC_GET_CLR(fte_match_set_misc3, mask, icmp_code, clr); + spec->icmpv6_type = IFC_GET_CLR(fte_match_set_misc3, mask, icmpv6_type, clr); + spec->icmpv6_code = IFC_GET_CLR(fte_match_set_misc3, mask, icmpv6_code, clr); + spec->geneve_tlv_option_0_data = + IFC_GET_CLR(fte_match_set_misc3, mask, geneve_tlv_option_0_data, clr); + spec->gtpu_teid = IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_teid, clr); + spec->gtpu_msg_flags = IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_msg_flags, clr); + spec->gtpu_msg_type = IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_msg_type, clr); + spec->gtpu_dw_0 = IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_dw_0, clr); + spec->gtpu_dw_2 = IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_dw_2, clr); + spec->gtpu_first_ext_dw_0 = + IFC_GET_CLR(fte_match_set_misc3, mask, gtpu_first_ext_dw_0, clr); +} + +static void dr_ste_copy_mask_misc4(char *mask, struct mlx5dr_match_misc4 *spec, bool clr) +{ + spec->prog_sample_field_id_0 = + IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_id_0, clr); + spec->prog_sample_field_value_0 = + IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_value_0, clr); + spec->prog_sample_field_id_1 = + IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_id_1, clr); + spec->prog_sample_field_value_1 = + IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_value_1, clr); + spec->prog_sample_field_id_2 = + IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_id_2, clr); + spec->prog_sample_field_value_2 = + IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_value_2, clr); + spec->prog_sample_field_id_3 = + IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_id_3, clr); + spec->prog_sample_field_value_3 = + IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_value_3, clr); +} + +static void dr_ste_copy_mask_misc5(char *mask, struct mlx5dr_match_misc5 *spec, bool clr) +{ + spec->macsec_tag_0 = + IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_0, clr); + spec->macsec_tag_1 = + IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_1, clr); + spec->macsec_tag_2 = + IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_2, clr); + spec->macsec_tag_3 = + IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_3, clr); + spec->tunnel_header_0 = + IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_0, clr); + spec->tunnel_header_1 = + IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_1, clr); + spec->tunnel_header_2 = + IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_2, clr); + spec->tunnel_header_3 = + IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_3, clr); +} + +void mlx5dr_ste_copy_param(u8 match_criteria, + struct mlx5dr_match_param *set_param, + struct mlx5dr_match_parameters *mask, + bool clr) +{ + u8 tail_param[MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)] = {}; + u8 *data = (u8 *)mask->match_buf; + size_t param_location; + void *buff; + + if (match_criteria & DR_MATCHER_CRITERIA_OUTER) { + if (mask->match_sz < sizeof(struct mlx5dr_match_spec)) { + memcpy(tail_param, data, mask->match_sz); + buff = tail_param; + } else { + buff = mask->match_buf; + } + dr_ste_copy_mask_spec(buff, &set_param->outer, clr); + } + param_location = sizeof(struct mlx5dr_match_spec); + + if (match_criteria & DR_MATCHER_CRITERIA_MISC) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_misc)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_misc(buff, &set_param->misc, clr); + } + param_location += sizeof(struct mlx5dr_match_misc); + + if (match_criteria & DR_MATCHER_CRITERIA_INNER) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_spec)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_spec(buff, &set_param->inner, clr); + } + param_location += sizeof(struct mlx5dr_match_spec); + + if (match_criteria & DR_MATCHER_CRITERIA_MISC2) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_misc2)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_misc2(buff, &set_param->misc2, clr); + } + + param_location += sizeof(struct mlx5dr_match_misc2); + + if (match_criteria & DR_MATCHER_CRITERIA_MISC3) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_misc3)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_misc3(buff, &set_param->misc3, clr); + } + + param_location += sizeof(struct mlx5dr_match_misc3); + + if (match_criteria & DR_MATCHER_CRITERIA_MISC4) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_misc4)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_misc4(buff, &set_param->misc4, clr); + } + + param_location += sizeof(struct mlx5dr_match_misc4); + + if (match_criteria & DR_MATCHER_CRITERIA_MISC5) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_misc5)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_misc5(buff, &set_param->misc5, clr); + } +} + +void mlx5dr_ste_build_eth_l2_src_dst(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_eth_l2_src_dst_init(sb, mask); +} + +void mlx5dr_ste_build_eth_l3_ipv6_dst(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_eth_l3_ipv6_dst_init(sb, mask); +} + +void mlx5dr_ste_build_eth_l3_ipv6_src(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_eth_l3_ipv6_src_init(sb, mask); +} + +void mlx5dr_ste_build_eth_l3_ipv4_5_tuple(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_eth_l3_ipv4_5_tuple_init(sb, mask); +} + +void mlx5dr_ste_build_eth_l2_src(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_eth_l2_src_init(sb, mask); +} + +void mlx5dr_ste_build_eth_l2_dst(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_eth_l2_dst_init(sb, mask); +} + +void mlx5dr_ste_build_eth_l2_tnl(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_eth_l2_tnl_init(sb, mask); +} + +void mlx5dr_ste_build_eth_l3_ipv4_misc(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_eth_l3_ipv4_misc_init(sb, mask); +} + +void mlx5dr_ste_build_eth_ipv6_l3_l4(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_eth_ipv6_l3_l4_init(sb, mask); +} + +static int dr_ste_build_empty_always_hit_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + return 0; +} + +void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx) +{ + sb->rx = rx; + sb->lu_type = MLX5DR_STE_LU_TYPE_DONT_CARE; + sb->byte_mask = 0; + sb->ste_build_tag_func = &dr_ste_build_empty_always_hit_tag; +} + +void mlx5dr_ste_build_mpls(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_mpls_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_gre(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_tnl_gre_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_mpls_over_gre(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + sb->caps = caps; + return ste_ctx->build_tnl_mpls_over_gre_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_mpls_over_udp(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + sb->caps = caps; + return ste_ctx->build_tnl_mpls_over_udp_init(sb, mask); +} + +void mlx5dr_ste_build_icmp(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + sb->caps = caps; + ste_ctx->build_icmp_init(sb, mask); +} + +void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_general_purpose_init(sb, mask); +} + +void mlx5dr_ste_build_eth_l4_misc(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_eth_l4_misc_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_vxlan_gpe(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_tnl_vxlan_gpe_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_geneve(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_tnl_geneve_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_geneve_tlv_opt(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + sb->rx = rx; + sb->caps = caps; + sb->inner = inner; + ste_ctx->build_tnl_geneve_tlv_opt_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_geneve_tlv_opt_exist(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + if (!ste_ctx->build_tnl_geneve_tlv_opt_exist_init) + return; + + sb->rx = rx; + sb->caps = caps; + sb->inner = inner; + ste_ctx->build_tnl_geneve_tlv_opt_exist_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_gtpu(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_tnl_gtpu_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_gtpu_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + sb->rx = rx; + sb->caps = caps; + sb->inner = inner; + ste_ctx->build_tnl_gtpu_flex_parser_0_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_gtpu_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + sb->rx = rx; + sb->caps = caps; + sb->inner = inner; + ste_ctx->build_tnl_gtpu_flex_parser_1_init(sb, mask); +} + +void mlx5dr_ste_build_register_0(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_register_0_init(sb, mask); +} + +void mlx5dr_ste_build_register_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_register_1_init(sb, mask); +} + +void mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn, + bool inner, bool rx) +{ + /* Set vhca_id_valid before we reset source_eswitch_owner_vhca_id */ + sb->vhca_id_valid = mask->misc.source_eswitch_owner_vhca_id; + + sb->rx = rx; + sb->dmn = dmn; + sb->inner = inner; + ste_ctx->build_src_gvmi_qpn_init(sb, mask); +} + +void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_flex_parser_0_init(sb, mask); +} + +void mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_flex_parser_1_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_header_0_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_tnl_header_0_1_init(sb, mask); +} + +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx(u8 version) +{ + if (version == MLX5_STEERING_FORMAT_CONNECTX_5) + return mlx5dr_ste_get_ctx_v0(); + else if (version == MLX5_STEERING_FORMAT_CONNECTX_6DX) + return mlx5dr_ste_get_ctx_v1(); + else if (version == MLX5_STEERING_FORMAT_CONNECTX_7) + return mlx5dr_ste_get_ctx_v2(); + + return NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h new file mode 100644 index 0000000000..54a6619c3e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h @@ -0,0 +1,209 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. */ + +#ifndef _DR_STE_ +#define _DR_STE_ + +#include "dr_types.h" + +#define STE_IPV4 0x1 +#define STE_IPV6 0x2 +#define STE_TCP 0x1 +#define STE_UDP 0x2 +#define STE_SPI 0x3 +#define IP_VERSION_IPV4 0x4 +#define IP_VERSION_IPV6 0x6 +#define STE_SVLAN 0x1 +#define STE_CVLAN 0x2 +#define HDR_LEN_L2_MACS 0xC +#define HDR_LEN_L2_VLAN 0x4 +#define HDR_LEN_L2_ETHER 0x2 +#define HDR_LEN_L2 (HDR_LEN_L2_MACS + HDR_LEN_L2_ETHER) +#define HDR_LEN_L2_W_VLAN (HDR_LEN_L2 + HDR_LEN_L2_VLAN) + +/* Set to STE a specific value using DR_STE_SET */ +#define DR_STE_SET_VAL(lookup_type, tag, t_fname, spec, s_fname, value) do { \ + if ((spec)->s_fname) { \ + MLX5_SET(ste_##lookup_type, tag, t_fname, value); \ + (spec)->s_fname = 0; \ + } \ +} while (0) + +/* Set to STE spec->s_fname to tag->t_fname set spec->s_fname as used */ +#define DR_STE_SET_TAG(lookup_type, tag, t_fname, spec, s_fname) \ + DR_STE_SET_VAL(lookup_type, tag, t_fname, spec, s_fname, spec->s_fname) + +/* Set to STE -1 to tag->t_fname and set spec->s_fname as used */ +#define DR_STE_SET_ONES(lookup_type, tag, t_fname, spec, s_fname) \ + DR_STE_SET_VAL(lookup_type, tag, t_fname, spec, s_fname, -1) + +#define DR_STE_SET_TCP_FLAGS(lookup_type, tag, spec) do { \ + MLX5_SET(ste_##lookup_type, tag, tcp_ns, !!((spec)->tcp_flags & (1 << 8))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_cwr, !!((spec)->tcp_flags & (1 << 7))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_ece, !!((spec)->tcp_flags & (1 << 6))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_urg, !!((spec)->tcp_flags & (1 << 5))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_ack, !!((spec)->tcp_flags & (1 << 4))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_psh, !!((spec)->tcp_flags & (1 << 3))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_rst, !!((spec)->tcp_flags & (1 << 2))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_syn, !!((spec)->tcp_flags & (1 << 1))); \ + MLX5_SET(ste_##lookup_type, tag, tcp_fin, !!((spec)->tcp_flags & (1 << 0))); \ +} while (0) + +#define DR_STE_SET_MPLS(lookup_type, mask, in_out, tag) do { \ + struct mlx5dr_match_misc2 *_mask = mask; \ + u8 *_tag = tag; \ + DR_STE_SET_TAG(lookup_type, _tag, mpls0_label, _mask, \ + in_out##_first_mpls_label);\ + DR_STE_SET_TAG(lookup_type, _tag, mpls0_s_bos, _mask, \ + in_out##_first_mpls_s_bos); \ + DR_STE_SET_TAG(lookup_type, _tag, mpls0_exp, _mask, \ + in_out##_first_mpls_exp); \ + DR_STE_SET_TAG(lookup_type, _tag, mpls0_ttl, _mask, \ + in_out##_first_mpls_ttl); \ +} while (0) + +#define DR_STE_SET_FLEX_PARSER_FIELD(tag, fname, caps, spec) do { \ + u8 parser_id = (caps)->flex_parser_id_##fname; \ + u8 *parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); \ + *(__be32 *)parser_ptr = cpu_to_be32((spec)->fname);\ + (spec)->fname = 0;\ +} while (0) + +#define DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(_misc) (\ + (_misc)->outer_first_mpls_over_gre_label || \ + (_misc)->outer_first_mpls_over_gre_exp || \ + (_misc)->outer_first_mpls_over_gre_s_bos || \ + (_misc)->outer_first_mpls_over_gre_ttl) + +#define DR_STE_IS_OUTER_MPLS_OVER_UDP_SET(_misc) (\ + (_misc)->outer_first_mpls_over_udp_label || \ + (_misc)->outer_first_mpls_over_udp_exp || \ + (_misc)->outer_first_mpls_over_udp_s_bos || \ + (_misc)->outer_first_mpls_over_udp_ttl) + +enum dr_ste_action_modify_type_l3 { + DR_STE_ACTION_MDFY_TYPE_L3_NONE = 0x0, + DR_STE_ACTION_MDFY_TYPE_L3_IPV4 = 0x1, + DR_STE_ACTION_MDFY_TYPE_L3_IPV6 = 0x2, +}; + +enum dr_ste_action_modify_type_l4 { + DR_STE_ACTION_MDFY_TYPE_L4_NONE = 0x0, + DR_STE_ACTION_MDFY_TYPE_L4_TCP = 0x1, + DR_STE_ACTION_MDFY_TYPE_L4_UDP = 0x2, +}; + +enum { + HDR_MPLS_OFFSET_LABEL = 12, + HDR_MPLS_OFFSET_EXP = 9, + HDR_MPLS_OFFSET_S_BOS = 8, + HDR_MPLS_OFFSET_TTL = 0, +}; + +u16 mlx5dr_ste_conv_bit_to_byte_mask(u8 *bit_mask); + +static inline u8 * +dr_ste_calc_flex_parser_offset(u8 *tag, u8 parser_id) +{ + /* Calculate tag byte offset based on flex parser id */ + return tag + 4 * (3 - (parser_id % 4)); +} + +#define DR_STE_CTX_BUILDER(fname) \ + ((*build_##fname##_init)(struct mlx5dr_ste_build *sb, \ + struct mlx5dr_match_param *mask)) + +struct mlx5dr_ste_ctx { + /* Builders */ + void DR_STE_CTX_BUILDER(eth_l2_src_dst); + void DR_STE_CTX_BUILDER(eth_l3_ipv6_src); + void DR_STE_CTX_BUILDER(eth_l3_ipv6_dst); + void DR_STE_CTX_BUILDER(eth_l3_ipv4_5_tuple); + void DR_STE_CTX_BUILDER(eth_l2_src); + void DR_STE_CTX_BUILDER(eth_l2_dst); + void DR_STE_CTX_BUILDER(eth_l2_tnl); + void DR_STE_CTX_BUILDER(eth_l3_ipv4_misc); + void DR_STE_CTX_BUILDER(eth_ipv6_l3_l4); + void DR_STE_CTX_BUILDER(mpls); + void DR_STE_CTX_BUILDER(tnl_gre); + void DR_STE_CTX_BUILDER(tnl_mpls); + void DR_STE_CTX_BUILDER(tnl_mpls_over_gre); + void DR_STE_CTX_BUILDER(tnl_mpls_over_udp); + void DR_STE_CTX_BUILDER(icmp); + void DR_STE_CTX_BUILDER(general_purpose); + void DR_STE_CTX_BUILDER(eth_l4_misc); + void DR_STE_CTX_BUILDER(tnl_vxlan_gpe); + void DR_STE_CTX_BUILDER(tnl_geneve); + void DR_STE_CTX_BUILDER(tnl_geneve_tlv_opt); + void DR_STE_CTX_BUILDER(tnl_geneve_tlv_opt_exist); + void DR_STE_CTX_BUILDER(register_0); + void DR_STE_CTX_BUILDER(register_1); + void DR_STE_CTX_BUILDER(src_gvmi_qpn); + void DR_STE_CTX_BUILDER(flex_parser_0); + void DR_STE_CTX_BUILDER(flex_parser_1); + void DR_STE_CTX_BUILDER(tnl_gtpu); + void DR_STE_CTX_BUILDER(tnl_header_0_1); + void DR_STE_CTX_BUILDER(tnl_gtpu_flex_parser_0); + void DR_STE_CTX_BUILDER(tnl_gtpu_flex_parser_1); + + /* Getters and Setters */ + void (*ste_init)(u8 *hw_ste_p, u16 lu_type, + bool is_rx, u16 gvmi); + void (*set_next_lu_type)(u8 *hw_ste_p, u16 lu_type); + u16 (*get_next_lu_type)(u8 *hw_ste_p); + bool (*is_miss_addr_set)(u8 *hw_ste_p); + void (*set_miss_addr)(u8 *hw_ste_p, u64 miss_addr); + u64 (*get_miss_addr)(u8 *hw_ste_p); + void (*set_hit_addr)(u8 *hw_ste_p, u64 icm_addr, u32 ht_size); + void (*set_byte_mask)(u8 *hw_ste_p, u16 byte_mask); + u16 (*get_byte_mask)(u8 *hw_ste_p); + + /* Actions */ + u32 actions_caps; + void (*set_actions_rx)(struct mlx5dr_domain *dmn, + u8 *action_type_set, + u32 actions_caps, + u8 *hw_ste_arr, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes); + void (*set_actions_tx)(struct mlx5dr_domain *dmn, + u8 *action_type_set, + u32 actions_caps, + u8 *hw_ste_arr, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes); + u32 modify_field_arr_sz; + const struct mlx5dr_ste_action_modify_field *modify_field_arr; + void (*set_action_set)(u8 *hw_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data); + void (*set_action_add)(u8 *hw_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data); + void (*set_action_copy)(u8 *hw_action, + u8 dst_hw_field, + u8 dst_shifter, + u8 dst_len, + u8 src_hw_field, + u8 src_shifter); + int (*set_action_decap_l3_list)(void *data, + u32 data_sz, + u8 *hw_action, + u32 hw_action_sz, + u16 *used_hw_action_num); + int (*alloc_modify_hdr_chunk)(struct mlx5dr_action *action); + void (*dealloc_modify_hdr_chunk)(struct mlx5dr_action *action); + + /* Send */ + void (*prepare_for_postsend)(u8 *hw_ste_p, u32 ste_size); +}; + +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v0(void); +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v1(void); +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v2(void); + +#endif /* _DR_STE_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c new file mode 100644 index 0000000000..f708b02942 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c @@ -0,0 +1,1962 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. */ + +#include +#include +#include "dr_ste.h" + +#define SVLAN_ETHERTYPE 0x88a8 +#define DR_STE_ENABLE_FLOW_TAG BIT(31) + +enum dr_ste_v0_entry_type { + DR_STE_TYPE_TX = 1, + DR_STE_TYPE_RX = 2, + DR_STE_TYPE_MODIFY_PKT = 6, +}; + +enum dr_ste_v0_action_tunl { + DR_STE_TUNL_ACTION_NONE = 0, + DR_STE_TUNL_ACTION_ENABLE = 1, + DR_STE_TUNL_ACTION_DECAP = 2, + DR_STE_TUNL_ACTION_L3_DECAP = 3, + DR_STE_TUNL_ACTION_POP_VLAN = 4, +}; + +enum dr_ste_v0_action_type { + DR_STE_ACTION_TYPE_PUSH_VLAN = 1, + DR_STE_ACTION_TYPE_ENCAP_L3 = 3, + DR_STE_ACTION_TYPE_ENCAP = 4, +}; + +enum dr_ste_v0_action_mdfy_op { + DR_STE_ACTION_MDFY_OP_COPY = 0x1, + DR_STE_ACTION_MDFY_OP_SET = 0x2, + DR_STE_ACTION_MDFY_OP_ADD = 0x3, +}; + +#define DR_STE_CALC_LU_TYPE(lookup_type, rx, inner) \ + ((inner) ? DR_STE_V0_LU_TYPE_##lookup_type##_I : \ + (rx) ? DR_STE_V0_LU_TYPE_##lookup_type##_D : \ + DR_STE_V0_LU_TYPE_##lookup_type##_O) + +enum { + DR_STE_V0_LU_TYPE_NOP = 0x00, + DR_STE_V0_LU_TYPE_SRC_GVMI_AND_QP = 0x05, + DR_STE_V0_LU_TYPE_ETHL2_TUNNELING_I = 0x0a, + DR_STE_V0_LU_TYPE_ETHL2_DST_O = 0x06, + DR_STE_V0_LU_TYPE_ETHL2_DST_I = 0x07, + DR_STE_V0_LU_TYPE_ETHL2_DST_D = 0x1b, + DR_STE_V0_LU_TYPE_ETHL2_SRC_O = 0x08, + DR_STE_V0_LU_TYPE_ETHL2_SRC_I = 0x09, + DR_STE_V0_LU_TYPE_ETHL2_SRC_D = 0x1c, + DR_STE_V0_LU_TYPE_ETHL2_SRC_DST_O = 0x36, + DR_STE_V0_LU_TYPE_ETHL2_SRC_DST_I = 0x37, + DR_STE_V0_LU_TYPE_ETHL2_SRC_DST_D = 0x38, + DR_STE_V0_LU_TYPE_ETHL3_IPV6_DST_O = 0x0d, + DR_STE_V0_LU_TYPE_ETHL3_IPV6_DST_I = 0x0e, + DR_STE_V0_LU_TYPE_ETHL3_IPV6_DST_D = 0x1e, + DR_STE_V0_LU_TYPE_ETHL3_IPV6_SRC_O = 0x0f, + DR_STE_V0_LU_TYPE_ETHL3_IPV6_SRC_I = 0x10, + DR_STE_V0_LU_TYPE_ETHL3_IPV6_SRC_D = 0x1f, + DR_STE_V0_LU_TYPE_ETHL3_IPV4_5_TUPLE_O = 0x11, + DR_STE_V0_LU_TYPE_ETHL3_IPV4_5_TUPLE_I = 0x12, + DR_STE_V0_LU_TYPE_ETHL3_IPV4_5_TUPLE_D = 0x20, + DR_STE_V0_LU_TYPE_ETHL3_IPV4_MISC_O = 0x29, + DR_STE_V0_LU_TYPE_ETHL3_IPV4_MISC_I = 0x2a, + DR_STE_V0_LU_TYPE_ETHL3_IPV4_MISC_D = 0x2b, + DR_STE_V0_LU_TYPE_ETHL4_O = 0x13, + DR_STE_V0_LU_TYPE_ETHL4_I = 0x14, + DR_STE_V0_LU_TYPE_ETHL4_D = 0x21, + DR_STE_V0_LU_TYPE_ETHL4_MISC_O = 0x2c, + DR_STE_V0_LU_TYPE_ETHL4_MISC_I = 0x2d, + DR_STE_V0_LU_TYPE_ETHL4_MISC_D = 0x2e, + DR_STE_V0_LU_TYPE_MPLS_FIRST_O = 0x15, + DR_STE_V0_LU_TYPE_MPLS_FIRST_I = 0x24, + DR_STE_V0_LU_TYPE_MPLS_FIRST_D = 0x25, + DR_STE_V0_LU_TYPE_GRE = 0x16, + DR_STE_V0_LU_TYPE_FLEX_PARSER_0 = 0x22, + DR_STE_V0_LU_TYPE_FLEX_PARSER_1 = 0x23, + DR_STE_V0_LU_TYPE_FLEX_PARSER_TNL_HEADER = 0x19, + DR_STE_V0_LU_TYPE_GENERAL_PURPOSE = 0x18, + DR_STE_V0_LU_TYPE_STEERING_REGISTERS_0 = 0x2f, + DR_STE_V0_LU_TYPE_STEERING_REGISTERS_1 = 0x30, + DR_STE_V0_LU_TYPE_TUNNEL_HEADER = 0x34, + DR_STE_V0_LU_TYPE_DONT_CARE = MLX5DR_STE_LU_TYPE_DONT_CARE, +}; + +enum { + DR_STE_V0_ACTION_MDFY_FLD_L2_0 = 0, + DR_STE_V0_ACTION_MDFY_FLD_L2_1 = 1, + DR_STE_V0_ACTION_MDFY_FLD_L2_2 = 2, + DR_STE_V0_ACTION_MDFY_FLD_L3_0 = 3, + DR_STE_V0_ACTION_MDFY_FLD_L3_1 = 4, + DR_STE_V0_ACTION_MDFY_FLD_L3_2 = 5, + DR_STE_V0_ACTION_MDFY_FLD_L3_3 = 6, + DR_STE_V0_ACTION_MDFY_FLD_L3_4 = 7, + DR_STE_V0_ACTION_MDFY_FLD_L4_0 = 8, + DR_STE_V0_ACTION_MDFY_FLD_L4_1 = 9, + DR_STE_V0_ACTION_MDFY_FLD_MPLS = 10, + DR_STE_V0_ACTION_MDFY_FLD_L2_TNL_0 = 11, + DR_STE_V0_ACTION_MDFY_FLD_REG_0 = 12, + DR_STE_V0_ACTION_MDFY_FLD_REG_1 = 13, + DR_STE_V0_ACTION_MDFY_FLD_REG_2 = 14, + DR_STE_V0_ACTION_MDFY_FLD_REG_3 = 15, + DR_STE_V0_ACTION_MDFY_FLD_L4_2 = 16, + DR_STE_V0_ACTION_MDFY_FLD_FLEX_0 = 17, + DR_STE_V0_ACTION_MDFY_FLD_FLEX_1 = 18, + DR_STE_V0_ACTION_MDFY_FLD_FLEX_2 = 19, + DR_STE_V0_ACTION_MDFY_FLD_FLEX_3 = 20, + DR_STE_V0_ACTION_MDFY_FLD_L2_TNL_1 = 21, + DR_STE_V0_ACTION_MDFY_FLD_METADATA = 22, + DR_STE_V0_ACTION_MDFY_FLD_RESERVED = 23, +}; + +static const struct mlx5dr_ste_action_modify_field dr_ste_v0_action_modify_field_arr[] = { + [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L2_1, .start = 16, .end = 47, + }, + [MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L2_1, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L2_2, .start = 32, .end = 47, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L2_0, .start = 16, .end = 47, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L2_0, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_1, .start = 0, .end = 5, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_0, .start = 48, .end = 56, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_0, .start = 0, .end = 15, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_0, .start = 16, .end = 31, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_1, .start = 8, .end = 15, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_1, .start = 8, .end = 15, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_0, .start = 0, .end = 15, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_0, .start = 16, .end = 31, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_3, .start = 32, .end = 63, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_3, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_4, .start = 32, .end = 63, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_4, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_0, .start = 32, .end = 63, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_2, .start = 32, .end = 63, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_2, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV4] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV4] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L3_0, .start = 32, .end = 63, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_A] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_METADATA, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_B] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_METADATA, .start = 32, .end = 63, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_REG_0, .start = 32, .end = 63, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_REG_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_REG_1, .start = 32, .end = 63, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_REG_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_REG_2, .start = 32, .end = 63, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_REG_2, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_1, .start = 32, .end = 63, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L4_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = { + .hw_field = DR_STE_V0_ACTION_MDFY_FLD_L2_2, .start = 0, .end = 15, + }, +}; + +static void dr_ste_v0_set_entry_type(u8 *hw_ste_p, u8 entry_type) +{ + MLX5_SET(ste_general, hw_ste_p, entry_type, entry_type); +} + +static u8 dr_ste_v0_get_entry_type(u8 *hw_ste_p) +{ + return MLX5_GET(ste_general, hw_ste_p, entry_type); +} + +static void dr_ste_v0_set_miss_addr(u8 *hw_ste_p, u64 miss_addr) +{ + u64 index = miss_addr >> 6; + + /* Miss address for TX and RX STEs located in the same offsets */ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_39_32, index >> 26); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_31_6, index); +} + +static u64 dr_ste_v0_get_miss_addr(u8 *hw_ste_p) +{ + u64 index = + ((u64)MLX5_GET(ste_rx_steering_mult, hw_ste_p, miss_address_31_6) | + ((u64)MLX5_GET(ste_rx_steering_mult, hw_ste_p, miss_address_39_32)) << 26); + + return index << 6; +} + +static void dr_ste_v0_set_byte_mask(u8 *hw_ste_p, u16 byte_mask) +{ + MLX5_SET(ste_general, hw_ste_p, byte_mask, byte_mask); +} + +static u16 dr_ste_v0_get_byte_mask(u8 *hw_ste_p) +{ + return MLX5_GET(ste_general, hw_ste_p, byte_mask); +} + +static void dr_ste_v0_set_lu_type(u8 *hw_ste_p, u16 lu_type) +{ + MLX5_SET(ste_general, hw_ste_p, entry_sub_type, lu_type); +} + +static void dr_ste_v0_set_next_lu_type(u8 *hw_ste_p, u16 lu_type) +{ + MLX5_SET(ste_general, hw_ste_p, next_lu_type, lu_type); +} + +static u16 dr_ste_v0_get_next_lu_type(u8 *hw_ste_p) +{ + return MLX5_GET(ste_general, hw_ste_p, next_lu_type); +} + +static void dr_ste_v0_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi) +{ + MLX5_SET(ste_general, hw_ste_p, next_table_base_63_48, gvmi); +} + +static void dr_ste_v0_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size) +{ + u64 index = (icm_addr >> 5) | ht_size; + + MLX5_SET(ste_general, hw_ste_p, next_table_base_39_32_size, index >> 27); + MLX5_SET(ste_general, hw_ste_p, next_table_base_31_5_size, index); +} + +static void dr_ste_v0_init_full(u8 *hw_ste_p, u16 lu_type, + enum dr_ste_v0_entry_type entry_type, u16 gvmi) +{ + dr_ste_v0_set_entry_type(hw_ste_p, entry_type); + dr_ste_v0_set_lu_type(hw_ste_p, lu_type); + dr_ste_v0_set_next_lu_type(hw_ste_p, MLX5DR_STE_LU_TYPE_DONT_CARE); + + /* Set GVMI once, this is the same for RX/TX + * bits 63_48 of next table base / miss address encode the next GVMI + */ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, gvmi, gvmi); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, next_table_base_63_48, gvmi); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_63_48, gvmi); +} + +static void dr_ste_v0_init(u8 *hw_ste_p, u16 lu_type, + bool is_rx, u16 gvmi) +{ + enum dr_ste_v0_entry_type entry_type; + + entry_type = is_rx ? DR_STE_TYPE_RX : DR_STE_TYPE_TX; + dr_ste_v0_init_full(hw_ste_p, lu_type, entry_type, gvmi); +} + +static void dr_ste_v0_rx_set_flow_tag(u8 *hw_ste_p, u32 flow_tag) +{ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, qp_list_pointer, + DR_STE_ENABLE_FLOW_TAG | flow_tag); +} + +static void dr_ste_v0_set_counter_id(u8 *hw_ste_p, u32 ctr_id) +{ + /* This can be used for both rx_steering_mult and for sx_transmit */ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, counter_trigger_15_0, ctr_id); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, counter_trigger_23_16, ctr_id >> 16); +} + +static void dr_ste_v0_set_go_back_bit(u8 *hw_ste_p) +{ + MLX5_SET(ste_sx_transmit, hw_ste_p, go_back, 1); +} + +static void dr_ste_v0_set_tx_push_vlan(u8 *hw_ste_p, u32 vlan_hdr, + bool go_back) +{ + MLX5_SET(ste_sx_transmit, hw_ste_p, action_type, + DR_STE_ACTION_TYPE_PUSH_VLAN); + MLX5_SET(ste_sx_transmit, hw_ste_p, encap_pointer_vlan_data, vlan_hdr); + /* Due to HW limitation we need to set this bit, otherwise reformat + + * push vlan will not work. + */ + if (go_back) + dr_ste_v0_set_go_back_bit(hw_ste_p); +} + +static void dr_ste_v0_set_tx_encap(void *hw_ste_p, u32 reformat_id, + int size, bool encap_l3) +{ + MLX5_SET(ste_sx_transmit, hw_ste_p, action_type, + encap_l3 ? DR_STE_ACTION_TYPE_ENCAP_L3 : DR_STE_ACTION_TYPE_ENCAP); + /* The hardware expects here size in words (2 byte) */ + MLX5_SET(ste_sx_transmit, hw_ste_p, action_description, size / 2); + MLX5_SET(ste_sx_transmit, hw_ste_p, encap_pointer_vlan_data, reformat_id); +} + +static void dr_ste_v0_set_rx_decap(u8 *hw_ste_p) +{ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action, + DR_STE_TUNL_ACTION_DECAP); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, fail_on_error, 1); +} + +static void dr_ste_v0_set_rx_pop_vlan(u8 *hw_ste_p) +{ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action, + DR_STE_TUNL_ACTION_POP_VLAN); +} + +static void dr_ste_v0_set_rx_decap_l3(u8 *hw_ste_p, bool vlan) +{ + MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action, + DR_STE_TUNL_ACTION_L3_DECAP); + MLX5_SET(ste_modify_packet, hw_ste_p, action_description, vlan ? 1 : 0); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, fail_on_error, 1); +} + +static void dr_ste_v0_set_rewrite_actions(u8 *hw_ste_p, u16 num_of_actions, + u32 re_write_index) +{ + MLX5_SET(ste_modify_packet, hw_ste_p, number_of_re_write_actions, + num_of_actions); + MLX5_SET(ste_modify_packet, hw_ste_p, header_re_write_actions_pointer, + re_write_index); +} + +static void dr_ste_v0_arr_init_next(u8 **last_ste, + u32 *added_stes, + enum dr_ste_v0_entry_type entry_type, + u16 gvmi) +{ + (*added_stes)++; + *last_ste += DR_STE_SIZE; + dr_ste_v0_init_full(*last_ste, MLX5DR_STE_LU_TYPE_DONT_CARE, + entry_type, gvmi); +} + +static void +dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn, + u8 *action_type_set, + u32 actions_caps, + u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes) +{ + bool encap = action_type_set[DR_ACTION_TYP_L2_TO_TNL_L2] || + action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]; + + /* We want to make sure the modify header comes before L2 + * encapsulation. The reason for that is that we support + * modify headers for outer headers only + */ + if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] && attr->modify_actions) { + dr_ste_v0_set_entry_type(last_ste, DR_STE_TYPE_MODIFY_PKT); + dr_ste_v0_set_rewrite_actions(last_ste, + attr->modify_actions, + attr->modify_index); + } + + if (action_type_set[DR_ACTION_TYP_PUSH_VLAN]) { + int i; + + for (i = 0; i < attr->vlans.count; i++) { + if (i || action_type_set[DR_ACTION_TYP_MODIFY_HDR]) + dr_ste_v0_arr_init_next(&last_ste, + added_stes, + DR_STE_TYPE_TX, + attr->gvmi); + + dr_ste_v0_set_tx_push_vlan(last_ste, + attr->vlans.headers[i], + encap); + } + } + + if (encap) { + /* Modify header and encapsulation require a different STEs. + * Since modify header STE format doesn't support encapsulation + * tunneling_action. + */ + if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] || + action_type_set[DR_ACTION_TYP_PUSH_VLAN]) + dr_ste_v0_arr_init_next(&last_ste, + added_stes, + DR_STE_TYPE_TX, + attr->gvmi); + + dr_ste_v0_set_tx_encap(last_ste, + attr->reformat.id, + attr->reformat.size, + action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]); + /* Whenever prio_tag_required enabled, we can be sure that the + * previous table (ACL) already push vlan to our packet, + * And due to HW limitation we need to set this bit, otherwise + * push vlan + reformat will not work. + */ + if (MLX5_CAP_GEN(dmn->mdev, prio_tag_required)) + dr_ste_v0_set_go_back_bit(last_ste); + } + + if (action_type_set[DR_ACTION_TYP_CTR]) + dr_ste_v0_set_counter_id(last_ste, attr->ctr_id); + + dr_ste_v0_set_hit_gvmi(last_ste, attr->hit_gvmi); + dr_ste_v0_set_hit_addr(last_ste, attr->final_icm_addr, 1); +} + +static void +dr_ste_v0_set_actions_rx(struct mlx5dr_domain *dmn, + u8 *action_type_set, + u32 actions_caps, + u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes) +{ + if (action_type_set[DR_ACTION_TYP_CTR]) + dr_ste_v0_set_counter_id(last_ste, attr->ctr_id); + + if (action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) { + dr_ste_v0_set_entry_type(last_ste, DR_STE_TYPE_MODIFY_PKT); + dr_ste_v0_set_rx_decap_l3(last_ste, attr->decap_with_vlan); + dr_ste_v0_set_rewrite_actions(last_ste, + attr->decap_actions, + attr->decap_index); + } + + if (action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2]) + dr_ste_v0_set_rx_decap(last_ste); + + if (action_type_set[DR_ACTION_TYP_POP_VLAN]) { + int i; + + for (i = 0; i < attr->vlans.count; i++) { + if (i || + action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2] || + action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) + dr_ste_v0_arr_init_next(&last_ste, + added_stes, + DR_STE_TYPE_RX, + attr->gvmi); + + dr_ste_v0_set_rx_pop_vlan(last_ste); + } + } + + if (action_type_set[DR_ACTION_TYP_MODIFY_HDR] && attr->modify_actions) { + if (dr_ste_v0_get_entry_type(last_ste) == DR_STE_TYPE_MODIFY_PKT) + dr_ste_v0_arr_init_next(&last_ste, + added_stes, + DR_STE_TYPE_MODIFY_PKT, + attr->gvmi); + else + dr_ste_v0_set_entry_type(last_ste, DR_STE_TYPE_MODIFY_PKT); + + dr_ste_v0_set_rewrite_actions(last_ste, + attr->modify_actions, + attr->modify_index); + } + + if (action_type_set[DR_ACTION_TYP_TAG]) { + if (dr_ste_v0_get_entry_type(last_ste) == DR_STE_TYPE_MODIFY_PKT) + dr_ste_v0_arr_init_next(&last_ste, + added_stes, + DR_STE_TYPE_RX, + attr->gvmi); + + dr_ste_v0_rx_set_flow_tag(last_ste, attr->flow_tag); + } + + dr_ste_v0_set_hit_gvmi(last_ste, attr->hit_gvmi); + dr_ste_v0_set_hit_addr(last_ste, attr->final_icm_addr, 1); +} + +static void dr_ste_v0_set_action_set(u8 *hw_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data) +{ + length = (length == 32) ? 0 : length; + MLX5_SET(dr_action_hw_set, hw_action, opcode, DR_STE_ACTION_MDFY_OP_SET); + MLX5_SET(dr_action_hw_set, hw_action, destination_field_code, hw_field); + MLX5_SET(dr_action_hw_set, hw_action, destination_left_shifter, shifter); + MLX5_SET(dr_action_hw_set, hw_action, destination_length, length); + MLX5_SET(dr_action_hw_set, hw_action, inline_data, data); +} + +static void dr_ste_v0_set_action_add(u8 *hw_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data) +{ + length = (length == 32) ? 0 : length; + MLX5_SET(dr_action_hw_set, hw_action, opcode, DR_STE_ACTION_MDFY_OP_ADD); + MLX5_SET(dr_action_hw_set, hw_action, destination_field_code, hw_field); + MLX5_SET(dr_action_hw_set, hw_action, destination_left_shifter, shifter); + MLX5_SET(dr_action_hw_set, hw_action, destination_length, length); + MLX5_SET(dr_action_hw_set, hw_action, inline_data, data); +} + +static void dr_ste_v0_set_action_copy(u8 *hw_action, + u8 dst_hw_field, + u8 dst_shifter, + u8 dst_len, + u8 src_hw_field, + u8 src_shifter) +{ + MLX5_SET(dr_action_hw_copy, hw_action, opcode, DR_STE_ACTION_MDFY_OP_COPY); + MLX5_SET(dr_action_hw_copy, hw_action, destination_field_code, dst_hw_field); + MLX5_SET(dr_action_hw_copy, hw_action, destination_left_shifter, dst_shifter); + MLX5_SET(dr_action_hw_copy, hw_action, destination_length, dst_len); + MLX5_SET(dr_action_hw_copy, hw_action, source_field_code, src_hw_field); + MLX5_SET(dr_action_hw_copy, hw_action, source_left_shifter, src_shifter); +} + +#define DR_STE_DECAP_L3_MIN_ACTION_NUM 5 + +static int +dr_ste_v0_set_action_decap_l3_list(void *data, u32 data_sz, + u8 *hw_action, u32 hw_action_sz, + u16 *used_hw_action_num) +{ + struct mlx5_ifc_l2_hdr_bits *l2_hdr = data; + u32 hw_action_num; + int required_actions; + u32 hdr_fld_4b; + u16 hdr_fld_2b; + u16 vlan_type; + bool vlan; + + vlan = (data_sz != HDR_LEN_L2); + hw_action_num = hw_action_sz / MLX5_ST_SZ_BYTES(dr_action_hw_set); + required_actions = DR_STE_DECAP_L3_MIN_ACTION_NUM + !!vlan; + + if (hw_action_num < required_actions) + return -ENOMEM; + + /* dmac_47_16 */ + MLX5_SET(dr_action_hw_set, hw_action, + opcode, DR_STE_ACTION_MDFY_OP_SET); + MLX5_SET(dr_action_hw_set, hw_action, + destination_length, 0); + MLX5_SET(dr_action_hw_set, hw_action, + destination_field_code, DR_STE_V0_ACTION_MDFY_FLD_L2_0); + MLX5_SET(dr_action_hw_set, hw_action, + destination_left_shifter, 16); + hdr_fld_4b = MLX5_GET(l2_hdr, l2_hdr, dmac_47_16); + MLX5_SET(dr_action_hw_set, hw_action, + inline_data, hdr_fld_4b); + hw_action += MLX5_ST_SZ_BYTES(dr_action_hw_set); + + /* smac_47_16 */ + MLX5_SET(dr_action_hw_set, hw_action, + opcode, DR_STE_ACTION_MDFY_OP_SET); + MLX5_SET(dr_action_hw_set, hw_action, + destination_length, 0); + MLX5_SET(dr_action_hw_set, hw_action, + destination_field_code, DR_STE_V0_ACTION_MDFY_FLD_L2_1); + MLX5_SET(dr_action_hw_set, hw_action, destination_left_shifter, 16); + hdr_fld_4b = (MLX5_GET(l2_hdr, l2_hdr, smac_31_0) >> 16 | + MLX5_GET(l2_hdr, l2_hdr, smac_47_32) << 16); + MLX5_SET(dr_action_hw_set, hw_action, inline_data, hdr_fld_4b); + hw_action += MLX5_ST_SZ_BYTES(dr_action_hw_set); + + /* dmac_15_0 */ + MLX5_SET(dr_action_hw_set, hw_action, + opcode, DR_STE_ACTION_MDFY_OP_SET); + MLX5_SET(dr_action_hw_set, hw_action, + destination_length, 16); + MLX5_SET(dr_action_hw_set, hw_action, + destination_field_code, DR_STE_V0_ACTION_MDFY_FLD_L2_0); + MLX5_SET(dr_action_hw_set, hw_action, + destination_left_shifter, 0); + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, dmac_15_0); + MLX5_SET(dr_action_hw_set, hw_action, + inline_data, hdr_fld_2b); + hw_action += MLX5_ST_SZ_BYTES(dr_action_hw_set); + + /* ethertype + (optional) vlan */ + MLX5_SET(dr_action_hw_set, hw_action, + opcode, DR_STE_ACTION_MDFY_OP_SET); + MLX5_SET(dr_action_hw_set, hw_action, + destination_field_code, DR_STE_V0_ACTION_MDFY_FLD_L2_2); + MLX5_SET(dr_action_hw_set, hw_action, + destination_left_shifter, 32); + if (!vlan) { + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, ethertype); + MLX5_SET(dr_action_hw_set, hw_action, inline_data, hdr_fld_2b); + MLX5_SET(dr_action_hw_set, hw_action, destination_length, 16); + } else { + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, ethertype); + vlan_type = hdr_fld_2b == SVLAN_ETHERTYPE ? DR_STE_SVLAN : DR_STE_CVLAN; + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, vlan); + hdr_fld_4b = (vlan_type << 16) | hdr_fld_2b; + MLX5_SET(dr_action_hw_set, hw_action, inline_data, hdr_fld_4b); + MLX5_SET(dr_action_hw_set, hw_action, destination_length, 18); + } + hw_action += MLX5_ST_SZ_BYTES(dr_action_hw_set); + + /* smac_15_0 */ + MLX5_SET(dr_action_hw_set, hw_action, + opcode, DR_STE_ACTION_MDFY_OP_SET); + MLX5_SET(dr_action_hw_set, hw_action, + destination_length, 16); + MLX5_SET(dr_action_hw_set, hw_action, + destination_field_code, DR_STE_V0_ACTION_MDFY_FLD_L2_1); + MLX5_SET(dr_action_hw_set, hw_action, + destination_left_shifter, 0); + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, smac_31_0); + MLX5_SET(dr_action_hw_set, hw_action, inline_data, hdr_fld_2b); + hw_action += MLX5_ST_SZ_BYTES(dr_action_hw_set); + + if (vlan) { + MLX5_SET(dr_action_hw_set, hw_action, + opcode, DR_STE_ACTION_MDFY_OP_SET); + hdr_fld_2b = MLX5_GET(l2_hdr, l2_hdr, vlan_type); + MLX5_SET(dr_action_hw_set, hw_action, + inline_data, hdr_fld_2b); + MLX5_SET(dr_action_hw_set, hw_action, + destination_length, 16); + MLX5_SET(dr_action_hw_set, hw_action, + destination_field_code, DR_STE_V0_ACTION_MDFY_FLD_L2_2); + MLX5_SET(dr_action_hw_set, hw_action, + destination_left_shifter, 0); + } + + *used_hw_action_num = required_actions; + + return 0; +} + +static void +dr_ste_v0_build_eth_l2_src_dst_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_src_dst, bit_mask, dmac_47_16, mask, dmac_47_16); + DR_STE_SET_TAG(eth_l2_src_dst, bit_mask, dmac_15_0, mask, dmac_15_0); + + if (mask->smac_47_16 || mask->smac_15_0) { + MLX5_SET(ste_eth_l2_src_dst, bit_mask, smac_47_32, + mask->smac_47_16 >> 16); + MLX5_SET(ste_eth_l2_src_dst, bit_mask, smac_31_0, + mask->smac_47_16 << 16 | mask->smac_15_0); + mask->smac_47_16 = 0; + mask->smac_15_0 = 0; + } + + DR_STE_SET_TAG(eth_l2_src_dst, bit_mask, first_vlan_id, mask, first_vid); + DR_STE_SET_TAG(eth_l2_src_dst, bit_mask, first_cfi, mask, first_cfi); + DR_STE_SET_TAG(eth_l2_src_dst, bit_mask, first_priority, mask, first_prio); + DR_STE_SET_ONES(eth_l2_src_dst, bit_mask, l3_type, mask, ip_version); + + if (mask->cvlan_tag) { + MLX5_SET(ste_eth_l2_src_dst, bit_mask, first_vlan_qualifier, -1); + mask->cvlan_tag = 0; + } else if (mask->svlan_tag) { + MLX5_SET(ste_eth_l2_src_dst, bit_mask, first_vlan_qualifier, -1); + mask->svlan_tag = 0; + } +} + +static int +dr_ste_v0_build_eth_l2_src_dst_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_47_16, spec, dmac_47_16); + DR_STE_SET_TAG(eth_l2_src_dst, tag, dmac_15_0, spec, dmac_15_0); + + if (spec->smac_47_16 || spec->smac_15_0) { + MLX5_SET(ste_eth_l2_src_dst, tag, smac_47_32, + spec->smac_47_16 >> 16); + MLX5_SET(ste_eth_l2_src_dst, tag, smac_31_0, + spec->smac_47_16 << 16 | spec->smac_15_0); + spec->smac_47_16 = 0; + spec->smac_15_0 = 0; + } + + if (spec->ip_version) { + if (spec->ip_version == IP_VERSION_IPV4) { + MLX5_SET(ste_eth_l2_src_dst, tag, l3_type, STE_IPV4); + spec->ip_version = 0; + } else if (spec->ip_version == IP_VERSION_IPV6) { + MLX5_SET(ste_eth_l2_src_dst, tag, l3_type, STE_IPV6); + spec->ip_version = 0; + } else { + return -EINVAL; + } + } + + DR_STE_SET_TAG(eth_l2_src_dst, tag, first_vlan_id, spec, first_vid); + DR_STE_SET_TAG(eth_l2_src_dst, tag, first_cfi, spec, first_cfi); + DR_STE_SET_TAG(eth_l2_src_dst, tag, first_priority, spec, first_prio); + + if (spec->cvlan_tag) { + MLX5_SET(ste_eth_l2_src_dst, tag, first_vlan_qualifier, DR_STE_CVLAN); + spec->cvlan_tag = 0; + } else if (spec->svlan_tag) { + MLX5_SET(ste_eth_l2_src_dst, tag, first_vlan_qualifier, DR_STE_SVLAN); + spec->svlan_tag = 0; + } + return 0; +} + +static void +dr_ste_v0_build_eth_l2_src_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_eth_l2_src_dst_bit_mask(mask, sb->inner, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_SRC_DST, sb->rx, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_eth_l2_src_dst_tag; +} + +static int +dr_ste_v0_build_eth_l3_ipv6_dst_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_127_96, spec, dst_ip_127_96); + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_95_64, spec, dst_ip_95_64); + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_63_32, spec, dst_ip_63_32); + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_31_0, spec, dst_ip_31_0); + + return 0; +} + +static void +dr_ste_v0_build_eth_l3_ipv6_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_eth_l3_ipv6_dst_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV6_DST, sb->rx, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_eth_l3_ipv6_dst_tag; +} + +static int +dr_ste_v0_build_eth_l3_ipv6_src_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_127_96, spec, src_ip_127_96); + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_95_64, spec, src_ip_95_64); + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_63_32, spec, src_ip_63_32); + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_31_0, spec, src_ip_31_0); + + return 0; +} + +static void +dr_ste_v0_build_eth_l3_ipv6_src_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_eth_l3_ipv6_src_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV6_SRC, sb->rx, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_eth_l3_ipv6_src_tag; +} + +static int +dr_ste_v0_build_eth_l3_ipv4_5_tuple_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_address, spec, dst_ip_31_0); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_address, spec, src_ip_31_0); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_port, spec, tcp_dport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, destination_port, spec, udp_dport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_port, spec, tcp_sport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, source_port, spec, udp_sport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, protocol, spec, ip_protocol); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, fragmented, spec, frag); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, dscp, spec, ip_dscp); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple, tag, ecn, spec, ip_ecn); + + if (spec->tcp_flags) { + DR_STE_SET_TCP_FLAGS(eth_l3_ipv4_5_tuple, tag, spec); + spec->tcp_flags = 0; + } + + return 0; +} + +static void +dr_ste_v0_build_eth_l3_ipv4_5_tuple_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_eth_l3_ipv4_5_tuple_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV4_5_TUPLE, sb->rx, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_eth_l3_ipv4_5_tuple_tag; +} + +static void +dr_ste_v0_build_eth_l2_src_or_dst_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc_mask = &value->misc; + + DR_STE_SET_TAG(eth_l2_src, bit_mask, first_vlan_id, mask, first_vid); + DR_STE_SET_TAG(eth_l2_src, bit_mask, first_cfi, mask, first_cfi); + DR_STE_SET_TAG(eth_l2_src, bit_mask, first_priority, mask, first_prio); + DR_STE_SET_TAG(eth_l2_src, bit_mask, ip_fragmented, mask, frag); + DR_STE_SET_TAG(eth_l2_src, bit_mask, l3_ethertype, mask, ethertype); + DR_STE_SET_ONES(eth_l2_src, bit_mask, l3_type, mask, ip_version); + + if (mask->svlan_tag || mask->cvlan_tag) { + MLX5_SET(ste_eth_l2_src, bit_mask, first_vlan_qualifier, -1); + mask->cvlan_tag = 0; + mask->svlan_tag = 0; + } + + if (inner) { + if (misc_mask->inner_second_cvlan_tag || + misc_mask->inner_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src, bit_mask, second_vlan_qualifier, -1); + misc_mask->inner_second_cvlan_tag = 0; + misc_mask->inner_second_svlan_tag = 0; + } + + DR_STE_SET_TAG(eth_l2_src, bit_mask, + second_vlan_id, misc_mask, inner_second_vid); + DR_STE_SET_TAG(eth_l2_src, bit_mask, + second_cfi, misc_mask, inner_second_cfi); + DR_STE_SET_TAG(eth_l2_src, bit_mask, + second_priority, misc_mask, inner_second_prio); + } else { + if (misc_mask->outer_second_cvlan_tag || + misc_mask->outer_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src, bit_mask, second_vlan_qualifier, -1); + misc_mask->outer_second_cvlan_tag = 0; + misc_mask->outer_second_svlan_tag = 0; + } + + DR_STE_SET_TAG(eth_l2_src, bit_mask, + second_vlan_id, misc_mask, outer_second_vid); + DR_STE_SET_TAG(eth_l2_src, bit_mask, + second_cfi, misc_mask, outer_second_cfi); + DR_STE_SET_TAG(eth_l2_src, bit_mask, + second_priority, misc_mask, outer_second_prio); + } +} + +static int +dr_ste_v0_build_eth_l2_src_or_dst_tag(struct mlx5dr_match_param *value, + bool inner, u8 *tag) +{ + struct mlx5dr_match_spec *spec = inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc_spec = &value->misc; + + DR_STE_SET_TAG(eth_l2_src, tag, first_vlan_id, spec, first_vid); + DR_STE_SET_TAG(eth_l2_src, tag, first_cfi, spec, first_cfi); + DR_STE_SET_TAG(eth_l2_src, tag, first_priority, spec, first_prio); + DR_STE_SET_TAG(eth_l2_src, tag, ip_fragmented, spec, frag); + DR_STE_SET_TAG(eth_l2_src, tag, l3_ethertype, spec, ethertype); + + if (spec->ip_version) { + if (spec->ip_version == IP_VERSION_IPV4) { + MLX5_SET(ste_eth_l2_src, tag, l3_type, STE_IPV4); + spec->ip_version = 0; + } else if (spec->ip_version == IP_VERSION_IPV6) { + MLX5_SET(ste_eth_l2_src, tag, l3_type, STE_IPV6); + spec->ip_version = 0; + } else { + return -EINVAL; + } + } + + if (spec->cvlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, first_vlan_qualifier, DR_STE_CVLAN); + spec->cvlan_tag = 0; + } else if (spec->svlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, first_vlan_qualifier, DR_STE_SVLAN); + spec->svlan_tag = 0; + } + + if (inner) { + if (misc_spec->inner_second_cvlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_CVLAN); + misc_spec->inner_second_cvlan_tag = 0; + } else if (misc_spec->inner_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_SVLAN); + misc_spec->inner_second_svlan_tag = 0; + } + + DR_STE_SET_TAG(eth_l2_src, tag, second_vlan_id, misc_spec, inner_second_vid); + DR_STE_SET_TAG(eth_l2_src, tag, second_cfi, misc_spec, inner_second_cfi); + DR_STE_SET_TAG(eth_l2_src, tag, second_priority, misc_spec, inner_second_prio); + } else { + if (misc_spec->outer_second_cvlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_CVLAN); + misc_spec->outer_second_cvlan_tag = 0; + } else if (misc_spec->outer_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src, tag, second_vlan_qualifier, DR_STE_SVLAN); + misc_spec->outer_second_svlan_tag = 0; + } + DR_STE_SET_TAG(eth_l2_src, tag, second_vlan_id, misc_spec, outer_second_vid); + DR_STE_SET_TAG(eth_l2_src, tag, second_cfi, misc_spec, outer_second_cfi); + DR_STE_SET_TAG(eth_l2_src, tag, second_priority, misc_spec, outer_second_prio); + } + + return 0; +} + +static void +dr_ste_v0_build_eth_l2_src_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_src, bit_mask, smac_47_16, mask, smac_47_16); + DR_STE_SET_TAG(eth_l2_src, bit_mask, smac_15_0, mask, smac_15_0); + + dr_ste_v0_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask); +} + +static int +dr_ste_v0_build_eth_l2_src_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_src, tag, smac_47_16, spec, smac_47_16); + DR_STE_SET_TAG(eth_l2_src, tag, smac_15_0, spec, smac_15_0); + + return dr_ste_v0_build_eth_l2_src_or_dst_tag(value, sb->inner, tag); +} + +static void +dr_ste_v0_build_eth_l2_src_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_eth_l2_src_bit_mask(mask, sb->inner, sb->bit_mask); + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_SRC, sb->rx, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_eth_l2_src_tag; +} + +static void +dr_ste_v0_build_eth_l2_dst_bit_mask(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_dst, bit_mask, dmac_47_16, mask, dmac_47_16); + DR_STE_SET_TAG(eth_l2_dst, bit_mask, dmac_15_0, mask, dmac_15_0); + + dr_ste_v0_build_eth_l2_src_or_dst_bit_mask(value, sb->inner, bit_mask); +} + +static int +dr_ste_v0_build_eth_l2_dst_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_dst, tag, dmac_47_16, spec, dmac_47_16); + DR_STE_SET_TAG(eth_l2_dst, tag, dmac_15_0, spec, dmac_15_0); + + return dr_ste_v0_build_eth_l2_src_or_dst_tag(value, sb->inner, tag); +} + +static void +dr_ste_v0_build_eth_l2_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_eth_l2_dst_bit_mask(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL2_DST, sb->rx, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_eth_l2_dst_tag; +} + +static void +dr_ste_v0_build_eth_l2_tnl_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_TAG(eth_l2_tnl, bit_mask, dmac_47_16, mask, dmac_47_16); + DR_STE_SET_TAG(eth_l2_tnl, bit_mask, dmac_15_0, mask, dmac_15_0); + DR_STE_SET_TAG(eth_l2_tnl, bit_mask, first_vlan_id, mask, first_vid); + DR_STE_SET_TAG(eth_l2_tnl, bit_mask, first_cfi, mask, first_cfi); + DR_STE_SET_TAG(eth_l2_tnl, bit_mask, first_priority, mask, first_prio); + DR_STE_SET_TAG(eth_l2_tnl, bit_mask, ip_fragmented, mask, frag); + DR_STE_SET_TAG(eth_l2_tnl, bit_mask, l3_ethertype, mask, ethertype); + DR_STE_SET_ONES(eth_l2_tnl, bit_mask, l3_type, mask, ip_version); + + if (misc->vxlan_vni) { + MLX5_SET(ste_eth_l2_tnl, bit_mask, + l2_tunneling_network_id, (misc->vxlan_vni << 8)); + misc->vxlan_vni = 0; + } + + if (mask->svlan_tag || mask->cvlan_tag) { + MLX5_SET(ste_eth_l2_tnl, bit_mask, first_vlan_qualifier, -1); + mask->cvlan_tag = 0; + mask->svlan_tag = 0; + } +} + +static int +dr_ste_v0_build_eth_l2_tnl_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_47_16, spec, dmac_47_16); + DR_STE_SET_TAG(eth_l2_tnl, tag, dmac_15_0, spec, dmac_15_0); + DR_STE_SET_TAG(eth_l2_tnl, tag, first_vlan_id, spec, first_vid); + DR_STE_SET_TAG(eth_l2_tnl, tag, first_cfi, spec, first_cfi); + DR_STE_SET_TAG(eth_l2_tnl, tag, ip_fragmented, spec, frag); + DR_STE_SET_TAG(eth_l2_tnl, tag, first_priority, spec, first_prio); + DR_STE_SET_TAG(eth_l2_tnl, tag, l3_ethertype, spec, ethertype); + + if (misc->vxlan_vni) { + MLX5_SET(ste_eth_l2_tnl, tag, l2_tunneling_network_id, + (misc->vxlan_vni << 8)); + misc->vxlan_vni = 0; + } + + if (spec->cvlan_tag) { + MLX5_SET(ste_eth_l2_tnl, tag, first_vlan_qualifier, DR_STE_CVLAN); + spec->cvlan_tag = 0; + } else if (spec->svlan_tag) { + MLX5_SET(ste_eth_l2_tnl, tag, first_vlan_qualifier, DR_STE_SVLAN); + spec->svlan_tag = 0; + } + + if (spec->ip_version) { + if (spec->ip_version == IP_VERSION_IPV4) { + MLX5_SET(ste_eth_l2_tnl, tag, l3_type, STE_IPV4); + spec->ip_version = 0; + } else if (spec->ip_version == IP_VERSION_IPV6) { + MLX5_SET(ste_eth_l2_tnl, tag, l3_type, STE_IPV6); + spec->ip_version = 0; + } else { + return -EINVAL; + } + } + + return 0; +} + +static void +dr_ste_v0_build_eth_l2_tnl_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_eth_l2_tnl_bit_mask(mask, sb->inner, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_ETHL2_TUNNELING_I; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_eth_l2_tnl_tag; +} + +static int +dr_ste_v0_build_eth_l3_ipv4_misc_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l3_ipv4_misc, tag, time_to_live, spec, ttl_hoplimit); + DR_STE_SET_TAG(eth_l3_ipv4_misc, tag, ihl, spec, ipv4_ihl); + + return 0; +} + +static void +dr_ste_v0_build_eth_l3_ipv4_misc_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_eth_l3_ipv4_misc_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL3_IPV4_MISC, sb->rx, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_eth_l3_ipv4_misc_tag; +} + +static int +dr_ste_v0_build_eth_ipv6_l3_l4_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_TAG(eth_l4, tag, dst_port, spec, tcp_dport); + DR_STE_SET_TAG(eth_l4, tag, src_port, spec, tcp_sport); + DR_STE_SET_TAG(eth_l4, tag, dst_port, spec, udp_dport); + DR_STE_SET_TAG(eth_l4, tag, src_port, spec, udp_sport); + DR_STE_SET_TAG(eth_l4, tag, protocol, spec, ip_protocol); + DR_STE_SET_TAG(eth_l4, tag, fragmented, spec, frag); + DR_STE_SET_TAG(eth_l4, tag, dscp, spec, ip_dscp); + DR_STE_SET_TAG(eth_l4, tag, ecn, spec, ip_ecn); + DR_STE_SET_TAG(eth_l4, tag, ipv6_hop_limit, spec, ttl_hoplimit); + + if (sb->inner) + DR_STE_SET_TAG(eth_l4, tag, flow_label, misc, inner_ipv6_flow_label); + else + DR_STE_SET_TAG(eth_l4, tag, flow_label, misc, outer_ipv6_flow_label); + + if (spec->tcp_flags) { + DR_STE_SET_TCP_FLAGS(eth_l4, tag, spec); + spec->tcp_flags = 0; + } + + return 0; +} + +static void +dr_ste_v0_build_eth_ipv6_l3_l4_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_eth_ipv6_l3_l4_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL4, sb->rx, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_eth_ipv6_l3_l4_tag; +} + +static int +dr_ste_v0_build_mpls_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + + if (sb->inner) + DR_STE_SET_MPLS(mpls, misc2, inner, tag); + else + DR_STE_SET_MPLS(mpls, misc2, outer, tag); + + return 0; +} + +static void +dr_ste_v0_build_mpls_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_mpls_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_LU_TYPE(MPLS_FIRST, sb->rx, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_mpls_tag; +} + +static int +dr_ste_v0_build_tnl_gre_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_TAG(gre, tag, gre_protocol, misc, gre_protocol); + + DR_STE_SET_TAG(gre, tag, gre_k_present, misc, gre_k_present); + DR_STE_SET_TAG(gre, tag, gre_key_h, misc, gre_key_h); + DR_STE_SET_TAG(gre, tag, gre_key_l, misc, gre_key_l); + + DR_STE_SET_TAG(gre, tag, gre_c_present, misc, gre_c_present); + + DR_STE_SET_TAG(gre, tag, gre_s_present, misc, gre_s_present); + + return 0; +} + +static void +dr_ste_v0_build_tnl_gre_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_tnl_gre_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_GRE; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_tnl_gre_tag; +} + +static int +dr_ste_v0_build_tnl_mpls_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc_2 = &value->misc2; + u32 mpls_hdr; + + if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc_2)) { + mpls_hdr = misc_2->outer_first_mpls_over_gre_label << HDR_MPLS_OFFSET_LABEL; + misc_2->outer_first_mpls_over_gre_label = 0; + mpls_hdr |= misc_2->outer_first_mpls_over_gre_exp << HDR_MPLS_OFFSET_EXP; + misc_2->outer_first_mpls_over_gre_exp = 0; + mpls_hdr |= misc_2->outer_first_mpls_over_gre_s_bos << HDR_MPLS_OFFSET_S_BOS; + misc_2->outer_first_mpls_over_gre_s_bos = 0; + mpls_hdr |= misc_2->outer_first_mpls_over_gre_ttl << HDR_MPLS_OFFSET_TTL; + misc_2->outer_first_mpls_over_gre_ttl = 0; + } else { + mpls_hdr = misc_2->outer_first_mpls_over_udp_label << HDR_MPLS_OFFSET_LABEL; + misc_2->outer_first_mpls_over_udp_label = 0; + mpls_hdr |= misc_2->outer_first_mpls_over_udp_exp << HDR_MPLS_OFFSET_EXP; + misc_2->outer_first_mpls_over_udp_exp = 0; + mpls_hdr |= misc_2->outer_first_mpls_over_udp_s_bos << HDR_MPLS_OFFSET_S_BOS; + misc_2->outer_first_mpls_over_udp_s_bos = 0; + mpls_hdr |= misc_2->outer_first_mpls_over_udp_ttl << HDR_MPLS_OFFSET_TTL; + misc_2->outer_first_mpls_over_udp_ttl = 0; + } + + MLX5_SET(ste_flex_parser_0, tag, flex_parser_3, mpls_hdr); + return 0; +} + +static void +dr_ste_v0_build_tnl_mpls_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_tnl_mpls_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_0; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_tnl_mpls_tag; +} + +static int +dr_ste_v0_build_tnl_mpls_over_udp_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + u8 *parser_ptr; + u8 parser_id; + u32 mpls_hdr; + + mpls_hdr = misc2->outer_first_mpls_over_udp_label << HDR_MPLS_OFFSET_LABEL; + misc2->outer_first_mpls_over_udp_label = 0; + mpls_hdr |= misc2->outer_first_mpls_over_udp_exp << HDR_MPLS_OFFSET_EXP; + misc2->outer_first_mpls_over_udp_exp = 0; + mpls_hdr |= misc2->outer_first_mpls_over_udp_s_bos << HDR_MPLS_OFFSET_S_BOS; + misc2->outer_first_mpls_over_udp_s_bos = 0; + mpls_hdr |= misc2->outer_first_mpls_over_udp_ttl << HDR_MPLS_OFFSET_TTL; + misc2->outer_first_mpls_over_udp_ttl = 0; + + parser_id = sb->caps->flex_parser_id_mpls_over_udp; + parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); + *(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr); + + return 0; +} + +static void +dr_ste_v0_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_tnl_mpls_over_udp_tag(mask, sb, sb->bit_mask); + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + sb->lu_type = sb->caps->flex_parser_id_mpls_over_udp > DR_STE_MAX_FLEX_0_ID ? + DR_STE_V0_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V0_LU_TYPE_FLEX_PARSER_0; + + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_tnl_mpls_over_udp_tag; +} + +static int +dr_ste_v0_build_tnl_mpls_over_gre_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + u8 *parser_ptr; + u8 parser_id; + u32 mpls_hdr; + + mpls_hdr = misc2->outer_first_mpls_over_gre_label << HDR_MPLS_OFFSET_LABEL; + misc2->outer_first_mpls_over_gre_label = 0; + mpls_hdr |= misc2->outer_first_mpls_over_gre_exp << HDR_MPLS_OFFSET_EXP; + misc2->outer_first_mpls_over_gre_exp = 0; + mpls_hdr |= misc2->outer_first_mpls_over_gre_s_bos << HDR_MPLS_OFFSET_S_BOS; + misc2->outer_first_mpls_over_gre_s_bos = 0; + mpls_hdr |= misc2->outer_first_mpls_over_gre_ttl << HDR_MPLS_OFFSET_TTL; + misc2->outer_first_mpls_over_gre_ttl = 0; + + parser_id = sb->caps->flex_parser_id_mpls_over_gre; + parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); + *(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr); + + return 0; +} + +static void +dr_ste_v0_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_tnl_mpls_over_gre_tag(mask, sb, sb->bit_mask); + + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + sb->lu_type = sb->caps->flex_parser_id_mpls_over_gre > DR_STE_MAX_FLEX_0_ID ? + DR_STE_V0_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V0_LU_TYPE_FLEX_PARSER_0; + + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_tnl_mpls_over_gre_tag; +} + +#define ICMP_TYPE_OFFSET_FIRST_DW 24 +#define ICMP_CODE_OFFSET_FIRST_DW 16 + +static int +dr_ste_v0_build_icmp_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc_3 = &value->misc3; + u32 *icmp_header_data; + int dw0_location; + int dw1_location; + u8 *parser_ptr; + u8 *icmp_type; + u8 *icmp_code; + bool is_ipv4; + u32 icmp_hdr; + + is_ipv4 = DR_MASK_IS_ICMPV4_SET(misc_3); + if (is_ipv4) { + icmp_header_data = &misc_3->icmpv4_header_data; + icmp_type = &misc_3->icmpv4_type; + icmp_code = &misc_3->icmpv4_code; + dw0_location = sb->caps->flex_parser_id_icmp_dw0; + dw1_location = sb->caps->flex_parser_id_icmp_dw1; + } else { + icmp_header_data = &misc_3->icmpv6_header_data; + icmp_type = &misc_3->icmpv6_type; + icmp_code = &misc_3->icmpv6_code; + dw0_location = sb->caps->flex_parser_id_icmpv6_dw0; + dw1_location = sb->caps->flex_parser_id_icmpv6_dw1; + } + + parser_ptr = dr_ste_calc_flex_parser_offset(tag, dw0_location); + icmp_hdr = (*icmp_type << ICMP_TYPE_OFFSET_FIRST_DW) | + (*icmp_code << ICMP_CODE_OFFSET_FIRST_DW); + *(__be32 *)parser_ptr = cpu_to_be32(icmp_hdr); + *icmp_code = 0; + *icmp_type = 0; + + parser_ptr = dr_ste_calc_flex_parser_offset(tag, dw1_location); + *(__be32 *)parser_ptr = cpu_to_be32(*icmp_header_data); + *icmp_header_data = 0; + + return 0; +} + +static void +dr_ste_v0_build_icmp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + u8 parser_id; + bool is_ipv4; + + dr_ste_v0_build_icmp_tag(mask, sb, sb->bit_mask); + + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + is_ipv4 = DR_MASK_IS_ICMPV4_SET(&mask->misc3); + parser_id = is_ipv4 ? sb->caps->flex_parser_id_icmp_dw0 : + sb->caps->flex_parser_id_icmpv6_dw0; + sb->lu_type = parser_id > DR_STE_MAX_FLEX_0_ID ? + DR_STE_V0_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V0_LU_TYPE_FLEX_PARSER_0; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_icmp_tag; +} + +static int +dr_ste_v0_build_general_purpose_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc_2 = &value->misc2; + + DR_STE_SET_TAG(general_purpose, tag, general_purpose_lookup_field, + misc_2, metadata_reg_a); + + return 0; +} + +static void +dr_ste_v0_build_general_purpose_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_general_purpose_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_GENERAL_PURPOSE; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_general_purpose_tag; +} + +static int +dr_ste_v0_build_eth_l4_misc_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + + if (sb->inner) { + DR_STE_SET_TAG(eth_l4_misc, tag, seq_num, misc3, inner_tcp_seq_num); + DR_STE_SET_TAG(eth_l4_misc, tag, ack_num, misc3, inner_tcp_ack_num); + } else { + DR_STE_SET_TAG(eth_l4_misc, tag, seq_num, misc3, outer_tcp_seq_num); + DR_STE_SET_TAG(eth_l4_misc, tag, ack_num, misc3, outer_tcp_ack_num); + } + + return 0; +} + +static void +dr_ste_v0_build_eth_l4_misc_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_eth_l4_misc_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_LU_TYPE(ETHL4_MISC, sb->rx, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_eth_l4_misc_tag; +} + +static int +dr_ste_v0_build_flex_parser_tnl_vxlan_gpe_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + + DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag, + outer_vxlan_gpe_flags, misc3, + outer_vxlan_gpe_flags); + DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag, + outer_vxlan_gpe_next_protocol, misc3, + outer_vxlan_gpe_next_protocol); + DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag, + outer_vxlan_gpe_vni, misc3, + outer_vxlan_gpe_vni); + + return 0; +} + +static void +dr_ste_v0_build_flex_parser_tnl_vxlan_gpe_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_flex_parser_tnl_vxlan_gpe_tag(mask, sb, sb->bit_mask); + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_TNL_HEADER; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tnl_vxlan_gpe_tag; +} + +static int +dr_ste_v0_build_flex_parser_tnl_geneve_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_TAG(flex_parser_tnl_geneve, tag, + geneve_protocol_type, misc, geneve_protocol_type); + DR_STE_SET_TAG(flex_parser_tnl_geneve, tag, + geneve_oam, misc, geneve_oam); + DR_STE_SET_TAG(flex_parser_tnl_geneve, tag, + geneve_opt_len, misc, geneve_opt_len); + DR_STE_SET_TAG(flex_parser_tnl_geneve, tag, + geneve_vni, misc, geneve_vni); + + return 0; +} + +static void +dr_ste_v0_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_flex_parser_tnl_geneve_tag(mask, sb, sb->bit_mask); + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_TNL_HEADER; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tnl_geneve_tag; +} + +static int +dr_ste_v0_build_register_0_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + + DR_STE_SET_TAG(register_0, tag, register_0_h, misc2, metadata_reg_c_0); + DR_STE_SET_TAG(register_0, tag, register_0_l, misc2, metadata_reg_c_1); + DR_STE_SET_TAG(register_0, tag, register_1_h, misc2, metadata_reg_c_2); + DR_STE_SET_TAG(register_0, tag, register_1_l, misc2, metadata_reg_c_3); + + return 0; +} + +static void +dr_ste_v0_build_register_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_register_0_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_STEERING_REGISTERS_0; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_register_0_tag; +} + +static int +dr_ste_v0_build_register_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + + DR_STE_SET_TAG(register_1, tag, register_2_h, misc2, metadata_reg_c_4); + DR_STE_SET_TAG(register_1, tag, register_2_l, misc2, metadata_reg_c_5); + DR_STE_SET_TAG(register_1, tag, register_3_h, misc2, metadata_reg_c_6); + DR_STE_SET_TAG(register_1, tag, register_3_l, misc2, metadata_reg_c_7); + + return 0; +} + +static void +dr_ste_v0_build_register_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_register_1_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_STEERING_REGISTERS_1; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_register_1_tag; +} + +static void +dr_ste_v0_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *value, + u8 *bit_mask) +{ + struct mlx5dr_match_misc *misc_mask = &value->misc; + + DR_STE_SET_ONES(src_gvmi_qp, bit_mask, source_gvmi, misc_mask, source_port); + DR_STE_SET_ONES(src_gvmi_qp, bit_mask, source_qp, misc_mask, source_sqn); + misc_mask->source_eswitch_owner_vhca_id = 0; +} + +static int +dr_ste_v0_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc *misc = &value->misc; + int id = misc->source_eswitch_owner_vhca_id; + struct mlx5dr_cmd_vport_cap *vport_cap; + struct mlx5dr_domain *dmn = sb->dmn; + struct mlx5dr_domain *vport_dmn; + u8 *bit_mask = sb->bit_mask; + struct mlx5dr_domain *peer; + bool source_gvmi_set; + + DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn); + + if (sb->vhca_id_valid) { + peer = xa_load(&dmn->peer_dmn_xa, id); + /* Find port GVMI based on the eswitch_owner_vhca_id */ + if (id == dmn->info.caps.gvmi) + vport_dmn = dmn; + else if (peer && (id == peer->info.caps.gvmi)) + vport_dmn = peer; + else + return -EINVAL; + + misc->source_eswitch_owner_vhca_id = 0; + } else { + vport_dmn = dmn; + } + + source_gvmi_set = MLX5_GET(ste_src_gvmi_qp, bit_mask, source_gvmi); + if (source_gvmi_set) { + vport_cap = mlx5dr_domain_get_vport_cap(vport_dmn, + misc->source_port); + if (!vport_cap) { + mlx5dr_err(dmn, "Vport 0x%x is disabled or invalid\n", + misc->source_port); + return -EINVAL; + } + + if (vport_cap->vport_gvmi) + MLX5_SET(ste_src_gvmi_qp, tag, source_gvmi, vport_cap->vport_gvmi); + + misc->source_port = 0; + } + + return 0; +} + +static void +dr_ste_v0_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_SRC_GVMI_AND_QP; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_src_gvmi_qpn_tag; +} + +static void dr_ste_v0_set_flex_parser(u32 *misc4_field_id, + u32 *misc4_field_value, + bool *parser_is_used, + u8 *tag) +{ + u32 id = *misc4_field_id; + u8 *parser_ptr; + + if (id >= DR_NUM_OF_FLEX_PARSERS || parser_is_used[id]) + return; + + parser_is_used[id] = true; + parser_ptr = dr_ste_calc_flex_parser_offset(tag, id); + + *(__be32 *)parser_ptr = cpu_to_be32(*misc4_field_value); + *misc4_field_id = 0; + *misc4_field_value = 0; +} + +static int dr_ste_v0_build_flex_parser_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc4 *misc_4_mask = &value->misc4; + bool parser_is_used[DR_NUM_OF_FLEX_PARSERS] = {}; + + dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_0, + &misc_4_mask->prog_sample_field_value_0, + parser_is_used, tag); + + dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_1, + &misc_4_mask->prog_sample_field_value_1, + parser_is_used, tag); + + dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_2, + &misc_4_mask->prog_sample_field_value_2, + parser_is_used, tag); + + dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_3, + &misc_4_mask->prog_sample_field_value_3, + parser_is_used, tag); + + return 0; +} + +static void dr_ste_v0_build_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_0; + dr_ste_v0_build_flex_parser_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tag; +} + +static void dr_ste_v0_build_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_1; + dr_ste_v0_build_flex_parser_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tag; +} + +static int +dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + u8 parser_id = sb->caps->flex_parser_id_geneve_tlv_option_0; + u8 *parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); + + MLX5_SET(ste_flex_parser_0, parser_ptr, flex_parser_3, + misc3->geneve_tlv_option_0_data); + misc3->geneve_tlv_option_0_data = 0; + + return 0; +} + +static void +dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_tag(mask, sb, sb->bit_mask); + + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + sb->lu_type = sb->caps->flex_parser_id_geneve_tlv_option_0 > 3 ? + DR_STE_V0_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V0_LU_TYPE_FLEX_PARSER_0; + + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_tag; +} + +static int dr_ste_v0_build_flex_parser_tnl_gtpu_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + + DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, + gtpu_msg_flags, misc3, + gtpu_msg_flags); + DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, + gtpu_msg_type, misc3, + gtpu_msg_type); + DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, + gtpu_teid, misc3, + gtpu_teid); + + return 0; +} + +static void dr_ste_v0_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_flex_parser_tnl_gtpu_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_TNL_HEADER; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tnl_gtpu_tag; +} + +static int +dr_ste_v0_build_tnl_gtpu_flex_parser_0_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3); + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_teid)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3); + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_2)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3); + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3); + return 0; +} + +static void +dr_ste_v0_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_tnl_gtpu_flex_parser_0_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_0; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_tnl_gtpu_flex_parser_0_tag; +} + +static int +dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3); + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_teid)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3); + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_2)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3); + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3); + return 0; +} + +static void +dr_ste_v0_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_1; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag; +} + +static int dr_ste_v0_build_tnl_header_0_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + uint8_t *tag) +{ + struct mlx5dr_match_misc5 *misc5 = &value->misc5; + + DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_0, misc5, tunnel_header_0); + DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_1, misc5, tunnel_header_1); + + return 0; +} + +static void dr_ste_v0_build_tnl_header_0_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V0_LU_TYPE_TUNNEL_HEADER; + dr_ste_v0_build_tnl_header_0_1_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_tnl_header_0_1_tag; +} + +static struct mlx5dr_ste_ctx ste_ctx_v0 = { + /* Builders */ + .build_eth_l2_src_dst_init = &dr_ste_v0_build_eth_l2_src_dst_init, + .build_eth_l3_ipv6_src_init = &dr_ste_v0_build_eth_l3_ipv6_src_init, + .build_eth_l3_ipv6_dst_init = &dr_ste_v0_build_eth_l3_ipv6_dst_init, + .build_eth_l3_ipv4_5_tuple_init = &dr_ste_v0_build_eth_l3_ipv4_5_tuple_init, + .build_eth_l2_src_init = &dr_ste_v0_build_eth_l2_src_init, + .build_eth_l2_dst_init = &dr_ste_v0_build_eth_l2_dst_init, + .build_eth_l2_tnl_init = &dr_ste_v0_build_eth_l2_tnl_init, + .build_eth_l3_ipv4_misc_init = &dr_ste_v0_build_eth_l3_ipv4_misc_init, + .build_eth_ipv6_l3_l4_init = &dr_ste_v0_build_eth_ipv6_l3_l4_init, + .build_mpls_init = &dr_ste_v0_build_mpls_init, + .build_tnl_gre_init = &dr_ste_v0_build_tnl_gre_init, + .build_tnl_mpls_init = &dr_ste_v0_build_tnl_mpls_init, + .build_tnl_mpls_over_udp_init = &dr_ste_v0_build_tnl_mpls_over_udp_init, + .build_tnl_mpls_over_gre_init = &dr_ste_v0_build_tnl_mpls_over_gre_init, + .build_icmp_init = &dr_ste_v0_build_icmp_init, + .build_general_purpose_init = &dr_ste_v0_build_general_purpose_init, + .build_eth_l4_misc_init = &dr_ste_v0_build_eth_l4_misc_init, + .build_tnl_vxlan_gpe_init = &dr_ste_v0_build_flex_parser_tnl_vxlan_gpe_init, + .build_tnl_geneve_init = &dr_ste_v0_build_flex_parser_tnl_geneve_init, + .build_tnl_geneve_tlv_opt_init = &dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_init, + .build_register_0_init = &dr_ste_v0_build_register_0_init, + .build_register_1_init = &dr_ste_v0_build_register_1_init, + .build_src_gvmi_qpn_init = &dr_ste_v0_build_src_gvmi_qpn_init, + .build_flex_parser_0_init = &dr_ste_v0_build_flex_parser_0_init, + .build_flex_parser_1_init = &dr_ste_v0_build_flex_parser_1_init, + .build_tnl_gtpu_init = &dr_ste_v0_build_flex_parser_tnl_gtpu_init, + .build_tnl_header_0_1_init = &dr_ste_v0_build_tnl_header_0_1_init, + .build_tnl_gtpu_flex_parser_0_init = &dr_ste_v0_build_tnl_gtpu_flex_parser_0_init, + .build_tnl_gtpu_flex_parser_1_init = &dr_ste_v0_build_tnl_gtpu_flex_parser_1_init, + + /* Getters and Setters */ + .ste_init = &dr_ste_v0_init, + .set_next_lu_type = &dr_ste_v0_set_next_lu_type, + .get_next_lu_type = &dr_ste_v0_get_next_lu_type, + .set_miss_addr = &dr_ste_v0_set_miss_addr, + .get_miss_addr = &dr_ste_v0_get_miss_addr, + .set_hit_addr = &dr_ste_v0_set_hit_addr, + .set_byte_mask = &dr_ste_v0_set_byte_mask, + .get_byte_mask = &dr_ste_v0_get_byte_mask, + + /* Actions */ + .actions_caps = DR_STE_CTX_ACTION_CAP_NONE, + .set_actions_rx = &dr_ste_v0_set_actions_rx, + .set_actions_tx = &dr_ste_v0_set_actions_tx, + .modify_field_arr_sz = ARRAY_SIZE(dr_ste_v0_action_modify_field_arr), + .modify_field_arr = dr_ste_v0_action_modify_field_arr, + .set_action_set = &dr_ste_v0_set_action_set, + .set_action_add = &dr_ste_v0_set_action_add, + .set_action_copy = &dr_ste_v0_set_action_copy, + .set_action_decap_l3_list = &dr_ste_v0_set_action_decap_l3_list, +}; + +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v0(void) +{ + return &ste_ctx_v0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c new file mode 100644 index 0000000000..dd856cde18 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c @@ -0,0 +1,2341 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. */ + +#include +#include "mlx5_ifc_dr_ste_v1.h" +#include "dr_ste_v1.h" + +#define DR_STE_CALC_DFNR_TYPE(lookup_type, inner) \ + ((inner) ? DR_STE_V1_LU_TYPE_##lookup_type##_I : \ + DR_STE_V1_LU_TYPE_##lookup_type##_O) + +enum dr_ste_v1_entry_format { + DR_STE_V1_TYPE_BWC_BYTE = 0x0, + DR_STE_V1_TYPE_BWC_DW = 0x1, + DR_STE_V1_TYPE_MATCH = 0x2, + DR_STE_V1_TYPE_MATCH_RANGES = 0x7, +}; + +/* Lookup type is built from 2B: [ Definer mode 1B ][ Definer index 1B ] */ +enum { + DR_STE_V1_LU_TYPE_NOP = 0x0000, + DR_STE_V1_LU_TYPE_ETHL2_TNL = 0x0002, + DR_STE_V1_LU_TYPE_IBL3_EXT = 0x0102, + DR_STE_V1_LU_TYPE_ETHL2_O = 0x0003, + DR_STE_V1_LU_TYPE_IBL4 = 0x0103, + DR_STE_V1_LU_TYPE_ETHL2_I = 0x0004, + DR_STE_V1_LU_TYPE_SRC_QP_GVMI = 0x0104, + DR_STE_V1_LU_TYPE_ETHL2_SRC_O = 0x0005, + DR_STE_V1_LU_TYPE_ETHL2_HEADERS_O = 0x0105, + DR_STE_V1_LU_TYPE_ETHL2_SRC_I = 0x0006, + DR_STE_V1_LU_TYPE_ETHL2_HEADERS_I = 0x0106, + DR_STE_V1_LU_TYPE_ETHL3_IPV4_5_TUPLE_O = 0x0007, + DR_STE_V1_LU_TYPE_IPV6_DES_O = 0x0107, + DR_STE_V1_LU_TYPE_ETHL3_IPV4_5_TUPLE_I = 0x0008, + DR_STE_V1_LU_TYPE_IPV6_DES_I = 0x0108, + DR_STE_V1_LU_TYPE_ETHL4_O = 0x0009, + DR_STE_V1_LU_TYPE_IPV6_SRC_O = 0x0109, + DR_STE_V1_LU_TYPE_ETHL4_I = 0x000a, + DR_STE_V1_LU_TYPE_IPV6_SRC_I = 0x010a, + DR_STE_V1_LU_TYPE_ETHL2_SRC_DST_O = 0x000b, + DR_STE_V1_LU_TYPE_MPLS_O = 0x010b, + DR_STE_V1_LU_TYPE_ETHL2_SRC_DST_I = 0x000c, + DR_STE_V1_LU_TYPE_MPLS_I = 0x010c, + DR_STE_V1_LU_TYPE_ETHL3_IPV4_MISC_O = 0x000d, + DR_STE_V1_LU_TYPE_GRE = 0x010d, + DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER = 0x000e, + DR_STE_V1_LU_TYPE_GENERAL_PURPOSE = 0x010e, + DR_STE_V1_LU_TYPE_ETHL3_IPV4_MISC_I = 0x000f, + DR_STE_V1_LU_TYPE_STEERING_REGISTERS_0 = 0x010f, + DR_STE_V1_LU_TYPE_STEERING_REGISTERS_1 = 0x0110, + DR_STE_V1_LU_TYPE_FLEX_PARSER_OK = 0x0011, + DR_STE_V1_LU_TYPE_FLEX_PARSER_0 = 0x0111, + DR_STE_V1_LU_TYPE_FLEX_PARSER_1 = 0x0112, + DR_STE_V1_LU_TYPE_ETHL4_MISC_O = 0x0113, + DR_STE_V1_LU_TYPE_ETHL4_MISC_I = 0x0114, + DR_STE_V1_LU_TYPE_INVALID = 0x00ff, + DR_STE_V1_LU_TYPE_DONT_CARE = MLX5DR_STE_LU_TYPE_DONT_CARE, +}; + +enum dr_ste_v1_header_anchors { + DR_STE_HEADER_ANCHOR_START_OUTER = 0x00, + DR_STE_HEADER_ANCHOR_1ST_VLAN = 0x02, + DR_STE_HEADER_ANCHOR_IPV6_IPV4 = 0x07, + DR_STE_HEADER_ANCHOR_INNER_MAC = 0x13, + DR_STE_HEADER_ANCHOR_INNER_IPV6_IPV4 = 0x19, +}; + +enum dr_ste_v1_action_size { + DR_STE_ACTION_SINGLE_SZ = 4, + DR_STE_ACTION_DOUBLE_SZ = 8, + DR_STE_ACTION_TRIPLE_SZ = 12, +}; + +enum dr_ste_v1_action_insert_ptr_attr { + DR_STE_V1_ACTION_INSERT_PTR_ATTR_NONE = 0, /* Regular push header (e.g. push vlan) */ + DR_STE_V1_ACTION_INSERT_PTR_ATTR_ENCAP = 1, /* Encapsulation / Tunneling */ + DR_STE_V1_ACTION_INSERT_PTR_ATTR_ESP = 2, /* IPsec */ +}; + +enum dr_ste_v1_action_id { + DR_STE_V1_ACTION_ID_NOP = 0x00, + DR_STE_V1_ACTION_ID_COPY = 0x05, + DR_STE_V1_ACTION_ID_SET = 0x06, + DR_STE_V1_ACTION_ID_ADD = 0x07, + DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE = 0x08, + DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER = 0x09, + DR_STE_V1_ACTION_ID_INSERT_INLINE = 0x0a, + DR_STE_V1_ACTION_ID_INSERT_POINTER = 0x0b, + DR_STE_V1_ACTION_ID_FLOW_TAG = 0x0c, + DR_STE_V1_ACTION_ID_QUEUE_ID_SEL = 0x0d, + DR_STE_V1_ACTION_ID_ACCELERATED_LIST = 0x0e, + DR_STE_V1_ACTION_ID_MODIFY_LIST = 0x0f, + DR_STE_V1_ACTION_ID_ASO = 0x12, + DR_STE_V1_ACTION_ID_TRAILER = 0x13, + DR_STE_V1_ACTION_ID_COUNTER_ID = 0x14, + DR_STE_V1_ACTION_ID_MAX = 0x21, + /* use for special cases */ + DR_STE_V1_ACTION_ID_SPECIAL_ENCAP_L3 = 0x22, +}; + +enum { + DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_0 = 0x00, + DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_1 = 0x01, + DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_2 = 0x02, + DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_0 = 0x08, + DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_1 = 0x09, + DR_STE_V1_ACTION_MDFY_FLD_L3_OUT_0 = 0x0e, + DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0 = 0x18, + DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_1 = 0x19, + DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_0 = 0x40, + DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_1 = 0x41, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_0 = 0x44, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_1 = 0x45, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_2 = 0x46, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_3 = 0x47, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_0 = 0x4c, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_1 = 0x4d, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_2 = 0x4e, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_3 = 0x4f, + DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_0 = 0x5e, + DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_1 = 0x5f, + DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_0 = 0x6f, + DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_1 = 0x70, + DR_STE_V1_ACTION_MDFY_FLD_METADATA_2_CQE = 0x7b, + DR_STE_V1_ACTION_MDFY_FLD_GNRL_PURPOSE = 0x7c, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_0 = 0x8c, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_1 = 0x8d, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_0 = 0x8e, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_1 = 0x8f, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_0 = 0x90, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_1 = 0x91, +}; + +enum dr_ste_v1_aso_ctx_type { + DR_STE_V1_ASO_CTX_TYPE_POLICERS = 0x2, +}; + +static const struct mlx5dr_ste_action_modify_field dr_ste_v1_action_modify_field_arr[] = { + [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_1, .start = 16, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_1, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_1, .start = 16, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L3_OUT_0, .start = 18, .end = 23, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_1, .start = 16, .end = 24, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_2, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_3, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_2, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_3, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV4] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV4] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_A] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_GNRL_PURPOSE, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_B] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_METADATA_2_CQE, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_2, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_EMD_31_0] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_EMD_47_32] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_0, .start = 0, .end = 15, + }, +}; + +static void dr_ste_v1_set_entry_type(u8 *hw_ste_p, u8 entry_type) +{ + MLX5_SET(ste_match_bwc_v1, hw_ste_p, entry_format, entry_type); +} + +bool dr_ste_v1_is_miss_addr_set(u8 *hw_ste_p) +{ + u8 entry_type = MLX5_GET(ste_match_bwc_v1, hw_ste_p, entry_format); + + /* unlike MATCH STE, for MATCH_RANGES STE both hit and miss addresses + * are part of the action, so they both set as part of STE init + */ + return entry_type == DR_STE_V1_TYPE_MATCH_RANGES; +} + +void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr) +{ + u64 index = miss_addr >> 6; + + MLX5_SET(ste_match_bwc_v1, hw_ste_p, miss_address_39_32, index >> 26); + MLX5_SET(ste_match_bwc_v1, hw_ste_p, miss_address_31_6, index); +} + +u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p) +{ + u64 index = + ((u64)MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_31_6) | + ((u64)MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_39_32)) << 26); + + return index << 6; +} + +void dr_ste_v1_set_byte_mask(u8 *hw_ste_p, u16 byte_mask) +{ + MLX5_SET(ste_match_bwc_v1, hw_ste_p, byte_mask, byte_mask); +} + +u16 dr_ste_v1_get_byte_mask(u8 *hw_ste_p) +{ + return MLX5_GET(ste_match_bwc_v1, hw_ste_p, byte_mask); +} + +static void dr_ste_v1_set_lu_type(u8 *hw_ste_p, u16 lu_type) +{ + MLX5_SET(ste_match_bwc_v1, hw_ste_p, entry_format, lu_type >> 8); + MLX5_SET(ste_match_bwc_v1, hw_ste_p, match_definer_ctx_idx, lu_type & 0xFF); +} + +void dr_ste_v1_set_next_lu_type(u8 *hw_ste_p, u16 lu_type) +{ + MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_entry_format, lu_type >> 8); + MLX5_SET(ste_match_bwc_v1, hw_ste_p, hash_definer_ctx_idx, lu_type & 0xFF); +} + +u16 dr_ste_v1_get_next_lu_type(u8 *hw_ste_p) +{ + u8 mode = MLX5_GET(ste_match_bwc_v1, hw_ste_p, next_entry_format); + u8 index = MLX5_GET(ste_match_bwc_v1, hw_ste_p, hash_definer_ctx_idx); + + return (mode << 8 | index); +} + +static void dr_ste_v1_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi) +{ + MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_63_48, gvmi); +} + +void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size) +{ + u64 index = (icm_addr >> 5) | ht_size; + + MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_39_32_size, index >> 27); + MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_31_5_size, index); +} + +void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type, bool is_rx, u16 gvmi) +{ + dr_ste_v1_set_lu_type(hw_ste_p, lu_type); + dr_ste_v1_set_next_lu_type(hw_ste_p, MLX5DR_STE_LU_TYPE_DONT_CARE); + + MLX5_SET(ste_match_bwc_v1, hw_ste_p, gvmi, gvmi); + MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_63_48, gvmi); + MLX5_SET(ste_match_bwc_v1, hw_ste_p, miss_address_63_48, gvmi); +} + +void dr_ste_v1_prepare_for_postsend(u8 *hw_ste_p, u32 ste_size) +{ + u8 *tag = hw_ste_p + DR_STE_SIZE_CTRL; + u8 *mask = tag + DR_STE_SIZE_TAG; + u8 tmp_tag[DR_STE_SIZE_TAG] = {}; + + if (ste_size == DR_STE_SIZE_CTRL) + return; + + WARN_ON(ste_size != DR_STE_SIZE); + + /* Backup tag */ + memcpy(tmp_tag, tag, DR_STE_SIZE_TAG); + + /* Swap mask and tag both are the same size */ + memcpy(tag, mask, DR_STE_SIZE_MASK); + memcpy(mask, tmp_tag, DR_STE_SIZE_TAG); +} + +static void dr_ste_v1_set_rx_flow_tag(u8 *s_action, u32 flow_tag) +{ + MLX5_SET(ste_single_action_flow_tag_v1, s_action, action_id, + DR_STE_V1_ACTION_ID_FLOW_TAG); + MLX5_SET(ste_single_action_flow_tag_v1, s_action, flow_tag, flow_tag); +} + +static void dr_ste_v1_set_counter_id(u8 *hw_ste_p, u32 ctr_id) +{ + MLX5_SET(ste_match_bwc_v1, hw_ste_p, counter_id, ctr_id); +} + +static void dr_ste_v1_set_reparse(u8 *hw_ste_p) +{ + MLX5_SET(ste_match_bwc_v1, hw_ste_p, reparse, 1); +} + +static void dr_ste_v1_set_encap(u8 *hw_ste_p, u8 *d_action, + u32 reformat_id, int size) +{ + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, action_id, + DR_STE_V1_ACTION_ID_INSERT_POINTER); + /* The hardware expects here size in words (2 byte) */ + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, size, size / 2); + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, pointer, reformat_id); + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, attributes, + DR_STE_V1_ACTION_INSERT_PTR_ATTR_ENCAP); + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action, + u32 reformat_id, + u8 anchor, u8 offset, + int size) +{ + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, + action_id, DR_STE_V1_ACTION_ID_INSERT_POINTER); + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, start_anchor, anchor); + + /* The hardware expects here size and offset in words (2 byte) */ + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, size, size / 2); + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, start_offset, offset / 2); + + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, pointer, reformat_id); + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, attributes, + DR_STE_V1_ACTION_INSERT_PTR_ATTR_NONE); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v1_set_remove_hdr(u8 *hw_ste_p, u8 *s_action, + u8 anchor, u8 offset, + int size) +{ + MLX5_SET(ste_single_action_remove_header_size_v1, s_action, + action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE); + MLX5_SET(ste_single_action_remove_header_size_v1, s_action, start_anchor, anchor); + + /* The hardware expects here size and offset in words (2 byte) */ + MLX5_SET(ste_single_action_remove_header_size_v1, s_action, remove_size, size / 2); + MLX5_SET(ste_single_action_remove_header_size_v1, s_action, start_offset, offset / 2); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v1_set_push_vlan(u8 *hw_ste_p, u8 *d_action, + u32 vlan_hdr) +{ + MLX5_SET(ste_double_action_insert_with_inline_v1, d_action, + action_id, DR_STE_V1_ACTION_ID_INSERT_INLINE); + /* The hardware expects offset to vlan header in words (2 byte) */ + MLX5_SET(ste_double_action_insert_with_inline_v1, d_action, + start_offset, HDR_LEN_L2_MACS >> 1); + MLX5_SET(ste_double_action_insert_with_inline_v1, d_action, + inline_data, vlan_hdr); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v1_set_pop_vlan(u8 *hw_ste_p, u8 *s_action, u8 vlans_num) +{ + MLX5_SET(ste_single_action_remove_header_size_v1, s_action, + action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE); + MLX5_SET(ste_single_action_remove_header_size_v1, s_action, + start_anchor, DR_STE_HEADER_ANCHOR_1ST_VLAN); + /* The hardware expects here size in words (2 byte) */ + MLX5_SET(ste_single_action_remove_header_size_v1, s_action, + remove_size, (HDR_LEN_L2_VLAN >> 1) * vlans_num); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v1_set_encap_l3(u8 *hw_ste_p, + u8 *frst_s_action, + u8 *scnd_d_action, + u32 reformat_id, + int size) +{ + /* Remove L2 headers */ + MLX5_SET(ste_single_action_remove_header_v1, frst_s_action, action_id, + DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER); + MLX5_SET(ste_single_action_remove_header_v1, frst_s_action, end_anchor, + DR_STE_HEADER_ANCHOR_IPV6_IPV4); + + /* Encapsulate with given reformat ID */ + MLX5_SET(ste_double_action_insert_with_ptr_v1, scnd_d_action, action_id, + DR_STE_V1_ACTION_ID_INSERT_POINTER); + /* The hardware expects here size in words (2 byte) */ + MLX5_SET(ste_double_action_insert_with_ptr_v1, scnd_d_action, size, size / 2); + MLX5_SET(ste_double_action_insert_with_ptr_v1, scnd_d_action, pointer, reformat_id); + MLX5_SET(ste_double_action_insert_with_ptr_v1, scnd_d_action, attributes, + DR_STE_V1_ACTION_INSERT_PTR_ATTR_ENCAP); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v1_set_rx_decap(u8 *hw_ste_p, u8 *s_action) +{ + MLX5_SET(ste_single_action_remove_header_v1, s_action, action_id, + DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER); + MLX5_SET(ste_single_action_remove_header_v1, s_action, decap, 1); + MLX5_SET(ste_single_action_remove_header_v1, s_action, vni_to_cqe, 1); + MLX5_SET(ste_single_action_remove_header_v1, s_action, end_anchor, + DR_STE_HEADER_ANCHOR_INNER_MAC); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v1_set_accelerated_rewrite_actions(u8 *hw_ste_p, + u8 *d_action, + u16 num_of_actions, + u32 rewrite_pattern, + u32 rewrite_args, + u8 *action_data) +{ + if (action_data) { + memcpy(d_action, action_data, DR_MODIFY_ACTION_SIZE); + } else { + MLX5_SET(ste_double_action_accelerated_modify_action_list_v1, d_action, + action_id, DR_STE_V1_ACTION_ID_ACCELERATED_LIST); + MLX5_SET(ste_double_action_accelerated_modify_action_list_v1, d_action, + modify_actions_pattern_pointer, rewrite_pattern); + MLX5_SET(ste_double_action_accelerated_modify_action_list_v1, d_action, + number_of_modify_actions, num_of_actions); + MLX5_SET(ste_double_action_accelerated_modify_action_list_v1, d_action, + modify_actions_argument_pointer, rewrite_args); + } + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v1_set_basic_rewrite_actions(u8 *hw_ste_p, + u8 *s_action, + u16 num_of_actions, + u32 rewrite_index) +{ + MLX5_SET(ste_single_action_modify_list_v1, s_action, action_id, + DR_STE_V1_ACTION_ID_MODIFY_LIST); + MLX5_SET(ste_single_action_modify_list_v1, s_action, num_of_modify_actions, + num_of_actions); + MLX5_SET(ste_single_action_modify_list_v1, s_action, modify_actions_ptr, + rewrite_index); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v1_set_rewrite_actions(u8 *hw_ste_p, + u8 *action, + u16 num_of_actions, + u32 rewrite_pattern, + u32 rewrite_args, + u8 *action_data) +{ + if (rewrite_pattern != MLX5DR_INVALID_PATTERN_INDEX) + return dr_ste_v1_set_accelerated_rewrite_actions(hw_ste_p, + action, + num_of_actions, + rewrite_pattern, + rewrite_args, + action_data); + + /* fall back to the code that doesn't support accelerated modify header */ + return dr_ste_v1_set_basic_rewrite_actions(hw_ste_p, + action, + num_of_actions, + rewrite_args); +} + +static void dr_ste_v1_set_aso_flow_meter(u8 *d_action, + u32 object_id, + u32 offset, + u8 dest_reg_id, + u8 init_color) +{ + MLX5_SET(ste_double_action_aso_v1, d_action, action_id, + DR_STE_V1_ACTION_ID_ASO); + MLX5_SET(ste_double_action_aso_v1, d_action, aso_context_number, + object_id + (offset / MLX5DR_ASO_FLOW_METER_NUM_PER_OBJ)); + /* Convert reg_c index to HW 64bit index */ + MLX5_SET(ste_double_action_aso_v1, d_action, dest_reg_id, + (dest_reg_id - 1) / 2); + MLX5_SET(ste_double_action_aso_v1, d_action, aso_context_type, + DR_STE_V1_ASO_CTX_TYPE_POLICERS); + MLX5_SET(ste_double_action_aso_v1, d_action, flow_meter.line_id, + offset % MLX5DR_ASO_FLOW_METER_NUM_PER_OBJ); + MLX5_SET(ste_double_action_aso_v1, d_action, flow_meter.initial_color, + init_color); +} + +static void dr_ste_v1_set_match_range_pkt_len(u8 *hw_ste_p, u32 definer_id, + u32 min, u32 max) +{ + MLX5_SET(ste_match_ranges_v1, hw_ste_p, match_definer_ctx_idx, definer_id); + + /* When the STE will be sent, its mask and tags will be swapped in + * dr_ste_v1_prepare_for_postsend(). This, however, is match range STE + * which doesn't have mask, and shouldn't have mask/tag swapped. + * We're using the common utilities functions to send this STE, so need + * to allow for this swapping - place the values in the corresponding + * locations to allow flipping them when writing to ICM. + * + * min/max_value_2 corresponds to match_dw_0 in its definer. + * To allow mask/tag swapping, writing the min/max_2 to min/max_0. + * + * Pkt len is 2 bytes that are stored in the higher section of the DW. + */ + MLX5_SET(ste_match_ranges_v1, hw_ste_p, min_value_0, min << 16); + MLX5_SET(ste_match_ranges_v1, hw_ste_p, max_value_0, max << 16); +} + +static void dr_ste_v1_arr_init_next_match(u8 **last_ste, + u32 *added_stes, + u16 gvmi) +{ + u8 *action; + + (*added_stes)++; + *last_ste += DR_STE_SIZE; + dr_ste_v1_init(*last_ste, MLX5DR_STE_LU_TYPE_DONT_CARE, 0, gvmi); + dr_ste_v1_set_entry_type(*last_ste, DR_STE_V1_TYPE_MATCH); + + action = MLX5_ADDR_OF(ste_mask_and_match_v1, *last_ste, action); + memset(action, 0, MLX5_FLD_SZ_BYTES(ste_mask_and_match_v1, action)); +} + +static void dr_ste_v1_arr_init_next_match_range(u8 **last_ste, + u32 *added_stes, + u16 gvmi) +{ + dr_ste_v1_arr_init_next_match(last_ste, added_stes, gvmi); + dr_ste_v1_set_entry_type(*last_ste, DR_STE_V1_TYPE_MATCH_RANGES); +} + +void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, + u8 *action_type_set, + u32 actions_caps, + u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes) +{ + u8 *action = MLX5_ADDR_OF(ste_match_bwc_v1, last_ste, action); + u8 action_sz = DR_STE_ACTION_DOUBLE_SZ; + bool allow_modify_hdr = true; + bool allow_encap = true; + + if (action_type_set[DR_ACTION_TYP_POP_VLAN]) { + if (action_sz < DR_STE_ACTION_SINGLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, + attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, + last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + dr_ste_v1_set_pop_vlan(last_ste, action, attr->vlans.count); + action_sz -= DR_STE_ACTION_SINGLE_SZ; + action += DR_STE_ACTION_SINGLE_SZ; + + /* Check if vlan_pop and modify_hdr on same STE is supported */ + if (!(actions_caps & DR_STE_CTX_ACTION_CAP_POP_MDFY)) + allow_modify_hdr = false; + } + + if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) { + if (!allow_modify_hdr || action_sz < DR_STE_ACTION_DOUBLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, + attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, + last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + dr_ste_v1_set_rewrite_actions(last_ste, action, + attr->modify_actions, + attr->modify_pat_idx, + attr->modify_index, + attr->single_modify_action); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + allow_encap = false; + } + + if (action_type_set[DR_ACTION_TYP_PUSH_VLAN]) { + int i; + + for (i = 0; i < attr->vlans.count; i++) { + if (action_sz < DR_STE_ACTION_DOUBLE_SZ || !allow_encap) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + allow_encap = true; + } + dr_ste_v1_set_push_vlan(last_ste, action, + attr->vlans.headers[i]); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + } + } + + if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L2]) { + if (!allow_encap || action_sz < DR_STE_ACTION_DOUBLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + allow_encap = true; + } + dr_ste_v1_set_encap(last_ste, action, + attr->reformat.id, + attr->reformat.size); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + } else if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]) { + u8 *d_action; + + if (action_sz < DR_STE_ACTION_TRIPLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + d_action = action + DR_STE_ACTION_SINGLE_SZ; + + dr_ste_v1_set_encap_l3(last_ste, + action, d_action, + attr->reformat.id, + attr->reformat.size); + action_sz -= DR_STE_ACTION_TRIPLE_SZ; + action += DR_STE_ACTION_TRIPLE_SZ; + } else if (action_type_set[DR_ACTION_TYP_INSERT_HDR]) { + if (!allow_encap || action_sz < DR_STE_ACTION_DOUBLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + dr_ste_v1_set_insert_hdr(last_ste, action, + attr->reformat.id, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + } else if (action_type_set[DR_ACTION_TYP_REMOVE_HDR]) { + if (action_sz < DR_STE_ACTION_SINGLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + dr_ste_v1_set_remove_hdr(last_ste, action, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); + action_sz -= DR_STE_ACTION_SINGLE_SZ; + action += DR_STE_ACTION_SINGLE_SZ; + } + + if (action_type_set[DR_ACTION_TYP_ASO_FLOW_METER]) { + if (action_sz < DR_STE_ACTION_DOUBLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + dr_ste_v1_set_aso_flow_meter(action, + attr->aso_flow_meter.obj_id, + attr->aso_flow_meter.offset, + attr->aso_flow_meter.dest_reg_id, + attr->aso_flow_meter.init_color); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + } + + if (action_type_set[DR_ACTION_TYP_RANGE]) { + /* match ranges requires a new STE of its own type */ + dr_ste_v1_arr_init_next_match_range(&last_ste, added_stes, attr->gvmi); + dr_ste_v1_set_miss_addr(last_ste, attr->range.miss_icm_addr); + + /* we do not support setting any action on the match ranges STE */ + action_sz = 0; + + dr_ste_v1_set_match_range_pkt_len(last_ste, + attr->range.definer_id, + attr->range.min, + attr->range.max); + } + + /* set counter ID on the last STE to adhere to DMFS behavior */ + if (action_type_set[DR_ACTION_TYP_CTR]) + dr_ste_v1_set_counter_id(last_ste, attr->ctr_id); + + dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi); + dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1); +} + +void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, + u8 *action_type_set, + u32 actions_caps, + u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes) +{ + u8 *action = MLX5_ADDR_OF(ste_match_bwc_v1, last_ste, action); + u8 action_sz = DR_STE_ACTION_DOUBLE_SZ; + bool allow_modify_hdr = true; + bool allow_ctr = true; + + if (action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) { + dr_ste_v1_set_rewrite_actions(last_ste, action, + attr->decap_actions, + attr->decap_pat_idx, + attr->decap_index, + NULL); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + allow_modify_hdr = false; + allow_ctr = false; + } else if (action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2]) { + dr_ste_v1_set_rx_decap(last_ste, action); + action_sz -= DR_STE_ACTION_SINGLE_SZ; + action += DR_STE_ACTION_SINGLE_SZ; + allow_modify_hdr = false; + allow_ctr = false; + } + + if (action_type_set[DR_ACTION_TYP_TAG]) { + if (action_sz < DR_STE_ACTION_SINGLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + allow_modify_hdr = true; + allow_ctr = true; + } + dr_ste_v1_set_rx_flow_tag(action, attr->flow_tag); + action_sz -= DR_STE_ACTION_SINGLE_SZ; + action += DR_STE_ACTION_SINGLE_SZ; + } + + if (action_type_set[DR_ACTION_TYP_POP_VLAN]) { + if (action_sz < DR_STE_ACTION_SINGLE_SZ || + !allow_modify_hdr) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + + dr_ste_v1_set_pop_vlan(last_ste, action, attr->vlans.count); + action_sz -= DR_STE_ACTION_SINGLE_SZ; + action += DR_STE_ACTION_SINGLE_SZ; + allow_ctr = false; + + /* Check if vlan_pop and modify_hdr on same STE is supported */ + if (!(actions_caps & DR_STE_CTX_ACTION_CAP_POP_MDFY)) + allow_modify_hdr = false; + } + + if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) { + /* Modify header and decapsulation must use different STEs */ + if (!allow_modify_hdr || action_sz < DR_STE_ACTION_DOUBLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + allow_modify_hdr = true; + allow_ctr = true; + } + dr_ste_v1_set_rewrite_actions(last_ste, action, + attr->modify_actions, + attr->modify_pat_idx, + attr->modify_index, + attr->single_modify_action); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + } + + if (action_type_set[DR_ACTION_TYP_PUSH_VLAN]) { + int i; + + for (i = 0; i < attr->vlans.count; i++) { + if (action_sz < DR_STE_ACTION_DOUBLE_SZ || + !allow_modify_hdr) { + dr_ste_v1_arr_init_next_match(&last_ste, + added_stes, + attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, + last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + dr_ste_v1_set_push_vlan(last_ste, action, + attr->vlans.headers[i]); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + } + } + + if (action_type_set[DR_ACTION_TYP_CTR]) { + /* Counter action set after decap and before insert_hdr + * to exclude decaped / encaped header respectively. + */ + if (!allow_ctr) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + allow_modify_hdr = true; + } + dr_ste_v1_set_counter_id(last_ste, attr->ctr_id); + allow_ctr = false; + } + + if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L2]) { + if (action_sz < DR_STE_ACTION_DOUBLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + dr_ste_v1_set_encap(last_ste, action, + attr->reformat.id, + attr->reformat.size); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + allow_modify_hdr = false; + } else if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]) { + u8 *d_action; + + if (action_sz < DR_STE_ACTION_TRIPLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + + d_action = action + DR_STE_ACTION_SINGLE_SZ; + + dr_ste_v1_set_encap_l3(last_ste, + action, d_action, + attr->reformat.id, + attr->reformat.size); + action_sz -= DR_STE_ACTION_TRIPLE_SZ; + allow_modify_hdr = false; + } else if (action_type_set[DR_ACTION_TYP_INSERT_HDR]) { + /* Modify header, decap, and encap must use different STEs */ + if (!allow_modify_hdr || action_sz < DR_STE_ACTION_DOUBLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + dr_ste_v1_set_insert_hdr(last_ste, action, + attr->reformat.id, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + allow_modify_hdr = false; + } else if (action_type_set[DR_ACTION_TYP_REMOVE_HDR]) { + if (action_sz < DR_STE_ACTION_SINGLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + allow_modify_hdr = true; + allow_ctr = true; + } + dr_ste_v1_set_remove_hdr(last_ste, action, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); + action_sz -= DR_STE_ACTION_SINGLE_SZ; + action += DR_STE_ACTION_SINGLE_SZ; + } + + if (action_type_set[DR_ACTION_TYP_ASO_FLOW_METER]) { + if (action_sz < DR_STE_ACTION_DOUBLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + dr_ste_v1_set_aso_flow_meter(action, + attr->aso_flow_meter.obj_id, + attr->aso_flow_meter.offset, + attr->aso_flow_meter.dest_reg_id, + attr->aso_flow_meter.init_color); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + } + + if (action_type_set[DR_ACTION_TYP_RANGE]) { + /* match ranges requires a new STE of its own type */ + dr_ste_v1_arr_init_next_match_range(&last_ste, added_stes, attr->gvmi); + dr_ste_v1_set_miss_addr(last_ste, attr->range.miss_icm_addr); + + /* we do not support setting any action on the match ranges STE */ + action_sz = 0; + + dr_ste_v1_set_match_range_pkt_len(last_ste, + attr->range.definer_id, + attr->range.min, + attr->range.max); + } + + dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi); + dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1); +} + +void dr_ste_v1_set_action_set(u8 *d_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data) +{ + shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET; + MLX5_SET(ste_double_action_set_v1, d_action, action_id, DR_STE_V1_ACTION_ID_SET); + MLX5_SET(ste_double_action_set_v1, d_action, destination_dw_offset, hw_field); + MLX5_SET(ste_double_action_set_v1, d_action, destination_left_shifter, shifter); + MLX5_SET(ste_double_action_set_v1, d_action, destination_length, length); + MLX5_SET(ste_double_action_set_v1, d_action, inline_data, data); +} + +void dr_ste_v1_set_action_add(u8 *d_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data) +{ + shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET; + MLX5_SET(ste_double_action_add_v1, d_action, action_id, DR_STE_V1_ACTION_ID_ADD); + MLX5_SET(ste_double_action_add_v1, d_action, destination_dw_offset, hw_field); + MLX5_SET(ste_double_action_add_v1, d_action, destination_left_shifter, shifter); + MLX5_SET(ste_double_action_add_v1, d_action, destination_length, length); + MLX5_SET(ste_double_action_add_v1, d_action, add_value, data); +} + +void dr_ste_v1_set_action_copy(u8 *d_action, + u8 dst_hw_field, + u8 dst_shifter, + u8 dst_len, + u8 src_hw_field, + u8 src_shifter) +{ + dst_shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET; + src_shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET; + MLX5_SET(ste_double_action_copy_v1, d_action, action_id, DR_STE_V1_ACTION_ID_COPY); + MLX5_SET(ste_double_action_copy_v1, d_action, destination_dw_offset, dst_hw_field); + MLX5_SET(ste_double_action_copy_v1, d_action, destination_left_shifter, dst_shifter); + MLX5_SET(ste_double_action_copy_v1, d_action, destination_length, dst_len); + MLX5_SET(ste_double_action_copy_v1, d_action, source_dw_offset, src_hw_field); + MLX5_SET(ste_double_action_copy_v1, d_action, source_right_shifter, src_shifter); +} + +#define DR_STE_DECAP_L3_ACTION_NUM 8 +#define DR_STE_L2_HDR_MAX_SZ 20 + +int dr_ste_v1_set_action_decap_l3_list(void *data, + u32 data_sz, + u8 *hw_action, + u32 hw_action_sz, + u16 *used_hw_action_num) +{ + u8 padded_data[DR_STE_L2_HDR_MAX_SZ] = {}; + void *data_ptr = padded_data; + u16 used_actions = 0; + u32 inline_data_sz; + u32 i; + + if (hw_action_sz / DR_STE_ACTION_DOUBLE_SZ < DR_STE_DECAP_L3_ACTION_NUM) + return -EINVAL; + + inline_data_sz = + MLX5_FLD_SZ_BYTES(ste_double_action_insert_with_inline_v1, inline_data); + + /* Add an alignment padding */ + memcpy(padded_data + data_sz % inline_data_sz, data, data_sz); + + /* Remove L2L3 outer headers */ + MLX5_SET(ste_single_action_remove_header_v1, hw_action, action_id, + DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER); + MLX5_SET(ste_single_action_remove_header_v1, hw_action, decap, 1); + MLX5_SET(ste_single_action_remove_header_v1, hw_action, vni_to_cqe, 1); + MLX5_SET(ste_single_action_remove_header_v1, hw_action, end_anchor, + DR_STE_HEADER_ANCHOR_INNER_IPV6_IPV4); + hw_action += DR_STE_ACTION_DOUBLE_SZ; + used_actions++; /* Remove and NOP are a single double action */ + + /* Point to the last dword of the header */ + data_ptr += (data_sz / inline_data_sz) * inline_data_sz; + + /* Add the new header using inline action 4Byte at a time, the header + * is added in reversed order to the beginning of the packet to avoid + * incorrect parsing by the HW. Since header is 14B or 18B an extra + * two bytes are padded and later removed. + */ + for (i = 0; i < data_sz / inline_data_sz + 1; i++) { + void *addr_inline; + + MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, action_id, + DR_STE_V1_ACTION_ID_INSERT_INLINE); + /* The hardware expects here offset to words (2 bytes) */ + MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, start_offset, 0); + + /* Copy bytes one by one to avoid endianness problem */ + addr_inline = MLX5_ADDR_OF(ste_double_action_insert_with_inline_v1, + hw_action, inline_data); + memcpy(addr_inline, data_ptr - i * inline_data_sz, inline_data_sz); + hw_action += DR_STE_ACTION_DOUBLE_SZ; + used_actions++; + } + + /* Remove first 2 extra bytes */ + MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, action_id, + DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE); + MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, start_offset, 0); + /* The hardware expects here size in words (2 bytes) */ + MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, remove_size, 1); + used_actions++; + + *used_hw_action_num = used_actions; + + return 0; +} + +static void dr_ste_v1_build_eth_l2_src_dst_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, dmac_47_16, mask, dmac_47_16); + DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, dmac_15_0, mask, dmac_15_0); + + DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, smac_47_16, mask, smac_47_16); + DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, smac_15_0, mask, smac_15_0); + + DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, first_vlan_id, mask, first_vid); + DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, first_cfi, mask, first_cfi); + DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, first_priority, mask, first_prio); + DR_STE_SET_ONES(eth_l2_src_dst_v1, bit_mask, l3_type, mask, ip_version); + + if (mask->cvlan_tag) { + MLX5_SET(ste_eth_l2_src_dst_v1, bit_mask, first_vlan_qualifier, -1); + mask->cvlan_tag = 0; + } else if (mask->svlan_tag) { + MLX5_SET(ste_eth_l2_src_dst_v1, bit_mask, first_vlan_qualifier, -1); + mask->svlan_tag = 0; + } +} + +static int dr_ste_v1_build_eth_l2_src_dst_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, dmac_47_16, spec, dmac_47_16); + DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, dmac_15_0, spec, dmac_15_0); + + DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, smac_47_16, spec, smac_47_16); + DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, smac_15_0, spec, smac_15_0); + + if (spec->ip_version == IP_VERSION_IPV4) { + MLX5_SET(ste_eth_l2_src_dst_v1, tag, l3_type, STE_IPV4); + spec->ip_version = 0; + } else if (spec->ip_version == IP_VERSION_IPV6) { + MLX5_SET(ste_eth_l2_src_dst_v1, tag, l3_type, STE_IPV6); + spec->ip_version = 0; + } else if (spec->ip_version) { + return -EINVAL; + } + + DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, first_vlan_id, spec, first_vid); + DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, first_cfi, spec, first_cfi); + DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, first_priority, spec, first_prio); + + if (spec->cvlan_tag) { + MLX5_SET(ste_eth_l2_src_dst_v1, tag, first_vlan_qualifier, DR_STE_CVLAN); + spec->cvlan_tag = 0; + } else if (spec->svlan_tag) { + MLX5_SET(ste_eth_l2_src_dst_v1, tag, first_vlan_qualifier, DR_STE_SVLAN); + spec->svlan_tag = 0; + } + return 0; +} + +void dr_ste_v1_build_eth_l2_src_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l2_src_dst_bit_mask(mask, sb->inner, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL2_SRC_DST, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l2_src_dst_tag; +} + +static int dr_ste_v1_build_eth_l3_ipv6_dst_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_127_96, spec, dst_ip_127_96); + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_95_64, spec, dst_ip_95_64); + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_63_32, spec, dst_ip_63_32); + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_31_0, spec, dst_ip_31_0); + + return 0; +} + +void dr_ste_v1_build_eth_l3_ipv6_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l3_ipv6_dst_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(IPV6_DES, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l3_ipv6_dst_tag; +} + +static int dr_ste_v1_build_eth_l3_ipv6_src_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_127_96, spec, src_ip_127_96); + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_95_64, spec, src_ip_95_64); + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_63_32, spec, src_ip_63_32); + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_31_0, spec, src_ip_31_0); + + return 0; +} + +void dr_ste_v1_build_eth_l3_ipv6_src_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l3_ipv6_src_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(IPV6_SRC, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l3_ipv6_src_tag; +} + +static int dr_ste_v1_build_eth_l3_ipv4_5_tuple_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, destination_address, spec, dst_ip_31_0); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, source_address, spec, src_ip_31_0); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, destination_port, spec, tcp_dport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, destination_port, spec, udp_dport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, source_port, spec, tcp_sport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, source_port, spec, udp_sport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, protocol, spec, ip_protocol); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, fragmented, spec, frag); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, dscp, spec, ip_dscp); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, ecn, spec, ip_ecn); + + if (spec->tcp_flags) { + DR_STE_SET_TCP_FLAGS(eth_l3_ipv4_5_tuple_v1, tag, spec); + spec->tcp_flags = 0; + } + + return 0; +} + +void dr_ste_v1_build_eth_l3_ipv4_5_tuple_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l3_ipv4_5_tuple_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL3_IPV4_5_TUPLE, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l3_ipv4_5_tuple_tag; +} + +static void dr_ste_v1_build_eth_l2_src_or_dst_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc_mask = &value->misc; + + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, first_vlan_id, mask, first_vid); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, first_cfi, mask, first_cfi); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, first_priority, mask, first_prio); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, ip_fragmented, mask, frag); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, l3_ethertype, mask, ethertype); + DR_STE_SET_ONES(eth_l2_src_v1, bit_mask, l3_type, mask, ip_version); + + if (mask->svlan_tag || mask->cvlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, bit_mask, first_vlan_qualifier, -1); + mask->cvlan_tag = 0; + mask->svlan_tag = 0; + } + + if (inner) { + if (misc_mask->inner_second_cvlan_tag || + misc_mask->inner_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, bit_mask, second_vlan_qualifier, -1); + misc_mask->inner_second_cvlan_tag = 0; + misc_mask->inner_second_svlan_tag = 0; + } + + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, + second_vlan_id, misc_mask, inner_second_vid); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, + second_cfi, misc_mask, inner_second_cfi); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, + second_priority, misc_mask, inner_second_prio); + } else { + if (misc_mask->outer_second_cvlan_tag || + misc_mask->outer_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, bit_mask, second_vlan_qualifier, -1); + misc_mask->outer_second_cvlan_tag = 0; + misc_mask->outer_second_svlan_tag = 0; + } + + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, + second_vlan_id, misc_mask, outer_second_vid); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, + second_cfi, misc_mask, outer_second_cfi); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, + second_priority, misc_mask, outer_second_prio); + } +} + +static int dr_ste_v1_build_eth_l2_src_or_dst_tag(struct mlx5dr_match_param *value, + bool inner, u8 *tag) +{ + struct mlx5dr_match_spec *spec = inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc_spec = &value->misc; + + DR_STE_SET_TAG(eth_l2_src_v1, tag, first_vlan_id, spec, first_vid); + DR_STE_SET_TAG(eth_l2_src_v1, tag, first_cfi, spec, first_cfi); + DR_STE_SET_TAG(eth_l2_src_v1, tag, first_priority, spec, first_prio); + DR_STE_SET_TAG(eth_l2_src_v1, tag, ip_fragmented, spec, frag); + DR_STE_SET_TAG(eth_l2_src_v1, tag, l3_ethertype, spec, ethertype); + + if (spec->ip_version == IP_VERSION_IPV4) { + MLX5_SET(ste_eth_l2_src_v1, tag, l3_type, STE_IPV4); + spec->ip_version = 0; + } else if (spec->ip_version == IP_VERSION_IPV6) { + MLX5_SET(ste_eth_l2_src_v1, tag, l3_type, STE_IPV6); + spec->ip_version = 0; + } else if (spec->ip_version) { + return -EINVAL; + } + + if (spec->cvlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, tag, first_vlan_qualifier, DR_STE_CVLAN); + spec->cvlan_tag = 0; + } else if (spec->svlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, tag, first_vlan_qualifier, DR_STE_SVLAN); + spec->svlan_tag = 0; + } + + if (inner) { + if (misc_spec->inner_second_cvlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, tag, second_vlan_qualifier, DR_STE_CVLAN); + misc_spec->inner_second_cvlan_tag = 0; + } else if (misc_spec->inner_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, tag, second_vlan_qualifier, DR_STE_SVLAN); + misc_spec->inner_second_svlan_tag = 0; + } + + DR_STE_SET_TAG(eth_l2_src_v1, tag, second_vlan_id, misc_spec, inner_second_vid); + DR_STE_SET_TAG(eth_l2_src_v1, tag, second_cfi, misc_spec, inner_second_cfi); + DR_STE_SET_TAG(eth_l2_src_v1, tag, second_priority, misc_spec, inner_second_prio); + } else { + if (misc_spec->outer_second_cvlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, tag, second_vlan_qualifier, DR_STE_CVLAN); + misc_spec->outer_second_cvlan_tag = 0; + } else if (misc_spec->outer_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, tag, second_vlan_qualifier, DR_STE_SVLAN); + misc_spec->outer_second_svlan_tag = 0; + } + DR_STE_SET_TAG(eth_l2_src_v1, tag, second_vlan_id, misc_spec, outer_second_vid); + DR_STE_SET_TAG(eth_l2_src_v1, tag, second_cfi, misc_spec, outer_second_cfi); + DR_STE_SET_TAG(eth_l2_src_v1, tag, second_priority, misc_spec, outer_second_prio); + } + + return 0; +} + +static void dr_ste_v1_build_eth_l2_src_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, smac_47_16, mask, smac_47_16); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, smac_15_0, mask, smac_15_0); + + dr_ste_v1_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask); +} + +static int dr_ste_v1_build_eth_l2_src_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_src_v1, tag, smac_47_16, spec, smac_47_16); + DR_STE_SET_TAG(eth_l2_src_v1, tag, smac_15_0, spec, smac_15_0); + + return dr_ste_v1_build_eth_l2_src_or_dst_tag(value, sb->inner, tag); +} + +void dr_ste_v1_build_eth_l2_src_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l2_src_bit_mask(mask, sb->inner, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL2_SRC, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l2_src_tag; +} + +static void dr_ste_v1_build_eth_l2_dst_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_dst_v1, bit_mask, dmac_47_16, mask, dmac_47_16); + DR_STE_SET_TAG(eth_l2_dst_v1, bit_mask, dmac_15_0, mask, dmac_15_0); + + dr_ste_v1_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask); +} + +static int dr_ste_v1_build_eth_l2_dst_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_dst_v1, tag, dmac_47_16, spec, dmac_47_16); + DR_STE_SET_TAG(eth_l2_dst_v1, tag, dmac_15_0, spec, dmac_15_0); + + return dr_ste_v1_build_eth_l2_src_or_dst_tag(value, sb->inner, tag); +} + +void dr_ste_v1_build_eth_l2_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l2_dst_bit_mask(mask, sb->inner, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL2, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l2_dst_tag; +} + +static void dr_ste_v1_build_eth_l2_tnl_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, dmac_47_16, mask, dmac_47_16); + DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, dmac_15_0, mask, dmac_15_0); + DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, first_vlan_id, mask, first_vid); + DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, first_cfi, mask, first_cfi); + DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, first_priority, mask, first_prio); + DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, ip_fragmented, mask, frag); + DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, l3_ethertype, mask, ethertype); + DR_STE_SET_ONES(eth_l2_tnl_v1, bit_mask, l3_type, mask, ip_version); + + if (misc->vxlan_vni) { + MLX5_SET(ste_eth_l2_tnl_v1, bit_mask, + l2_tunneling_network_id, (misc->vxlan_vni << 8)); + misc->vxlan_vni = 0; + } + + if (mask->svlan_tag || mask->cvlan_tag) { + MLX5_SET(ste_eth_l2_tnl_v1, bit_mask, first_vlan_qualifier, -1); + mask->cvlan_tag = 0; + mask->svlan_tag = 0; + } +} + +static int dr_ste_v1_build_eth_l2_tnl_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_TAG(eth_l2_tnl_v1, tag, dmac_47_16, spec, dmac_47_16); + DR_STE_SET_TAG(eth_l2_tnl_v1, tag, dmac_15_0, spec, dmac_15_0); + DR_STE_SET_TAG(eth_l2_tnl_v1, tag, first_vlan_id, spec, first_vid); + DR_STE_SET_TAG(eth_l2_tnl_v1, tag, first_cfi, spec, first_cfi); + DR_STE_SET_TAG(eth_l2_tnl_v1, tag, ip_fragmented, spec, frag); + DR_STE_SET_TAG(eth_l2_tnl_v1, tag, first_priority, spec, first_prio); + DR_STE_SET_TAG(eth_l2_tnl_v1, tag, l3_ethertype, spec, ethertype); + + if (misc->vxlan_vni) { + MLX5_SET(ste_eth_l2_tnl_v1, tag, l2_tunneling_network_id, + (misc->vxlan_vni << 8)); + misc->vxlan_vni = 0; + } + + if (spec->cvlan_tag) { + MLX5_SET(ste_eth_l2_tnl_v1, tag, first_vlan_qualifier, DR_STE_CVLAN); + spec->cvlan_tag = 0; + } else if (spec->svlan_tag) { + MLX5_SET(ste_eth_l2_tnl_v1, tag, first_vlan_qualifier, DR_STE_SVLAN); + spec->svlan_tag = 0; + } + + if (spec->ip_version == IP_VERSION_IPV4) { + MLX5_SET(ste_eth_l2_tnl_v1, tag, l3_type, STE_IPV4); + spec->ip_version = 0; + } else if (spec->ip_version == IP_VERSION_IPV6) { + MLX5_SET(ste_eth_l2_tnl_v1, tag, l3_type, STE_IPV6); + spec->ip_version = 0; + } else if (spec->ip_version) { + return -EINVAL; + } + + return 0; +} + +void dr_ste_v1_build_eth_l2_tnl_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l2_tnl_bit_mask(mask, sb->inner, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_ETHL2_TNL; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l2_tnl_tag; +} + +static int dr_ste_v1_build_eth_l3_ipv4_misc_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l3_ipv4_misc_v1, tag, time_to_live, spec, ttl_hoplimit); + DR_STE_SET_TAG(eth_l3_ipv4_misc_v1, tag, ihl, spec, ipv4_ihl); + + return 0; +} + +void dr_ste_v1_build_eth_l3_ipv4_misc_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l3_ipv4_misc_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL3_IPV4_MISC, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l3_ipv4_misc_tag; +} + +static int dr_ste_v1_build_eth_ipv6_l3_l4_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_TAG(eth_l4_v1, tag, dst_port, spec, tcp_dport); + DR_STE_SET_TAG(eth_l4_v1, tag, src_port, spec, tcp_sport); + DR_STE_SET_TAG(eth_l4_v1, tag, dst_port, spec, udp_dport); + DR_STE_SET_TAG(eth_l4_v1, tag, src_port, spec, udp_sport); + DR_STE_SET_TAG(eth_l4_v1, tag, protocol, spec, ip_protocol); + DR_STE_SET_TAG(eth_l4_v1, tag, fragmented, spec, frag); + DR_STE_SET_TAG(eth_l4_v1, tag, dscp, spec, ip_dscp); + DR_STE_SET_TAG(eth_l4_v1, tag, ecn, spec, ip_ecn); + DR_STE_SET_TAG(eth_l4_v1, tag, ipv6_hop_limit, spec, ttl_hoplimit); + + if (sb->inner) + DR_STE_SET_TAG(eth_l4_v1, tag, flow_label, misc, inner_ipv6_flow_label); + else + DR_STE_SET_TAG(eth_l4_v1, tag, flow_label, misc, outer_ipv6_flow_label); + + if (spec->tcp_flags) { + DR_STE_SET_TCP_FLAGS(eth_l4_v1, tag, spec); + spec->tcp_flags = 0; + } + + return 0; +} + +void dr_ste_v1_build_eth_ipv6_l3_l4_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_ipv6_l3_l4_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL4, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_ipv6_l3_l4_tag; +} + +static int dr_ste_v1_build_mpls_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + + if (sb->inner) + DR_STE_SET_MPLS(mpls_v1, misc2, inner, tag); + else + DR_STE_SET_MPLS(mpls_v1, misc2, outer, tag); + + return 0; +} + +void dr_ste_v1_build_mpls_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_mpls_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(MPLS, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_mpls_tag; +} + +static int dr_ste_v1_build_tnl_gre_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_TAG(gre_v1, tag, gre_protocol, misc, gre_protocol); + DR_STE_SET_TAG(gre_v1, tag, gre_k_present, misc, gre_k_present); + DR_STE_SET_TAG(gre_v1, tag, gre_key_h, misc, gre_key_h); + DR_STE_SET_TAG(gre_v1, tag, gre_key_l, misc, gre_key_l); + + DR_STE_SET_TAG(gre_v1, tag, gre_c_present, misc, gre_c_present); + DR_STE_SET_TAG(gre_v1, tag, gre_s_present, misc, gre_s_present); + + return 0; +} + +void dr_ste_v1_build_tnl_gre_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_tnl_gre_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_GRE; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_gre_tag; +} + +static int dr_ste_v1_build_tnl_mpls_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + + if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc2)) { + DR_STE_SET_TAG(mpls_v1, tag, mpls0_label, + misc2, outer_first_mpls_over_gre_label); + + DR_STE_SET_TAG(mpls_v1, tag, mpls0_exp, + misc2, outer_first_mpls_over_gre_exp); + + DR_STE_SET_TAG(mpls_v1, tag, mpls0_s_bos, + misc2, outer_first_mpls_over_gre_s_bos); + + DR_STE_SET_TAG(mpls_v1, tag, mpls0_ttl, + misc2, outer_first_mpls_over_gre_ttl); + } else { + DR_STE_SET_TAG(mpls_v1, tag, mpls0_label, + misc2, outer_first_mpls_over_udp_label); + + DR_STE_SET_TAG(mpls_v1, tag, mpls0_exp, + misc2, outer_first_mpls_over_udp_exp); + + DR_STE_SET_TAG(mpls_v1, tag, mpls0_s_bos, + misc2, outer_first_mpls_over_udp_s_bos); + + DR_STE_SET_TAG(mpls_v1, tag, mpls0_ttl, + misc2, outer_first_mpls_over_udp_ttl); + } + + return 0; +} + +void dr_ste_v1_build_tnl_mpls_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_tnl_mpls_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_MPLS_I; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_mpls_tag; +} + +static int dr_ste_v1_build_tnl_mpls_over_udp_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + u8 *parser_ptr; + u8 parser_id; + u32 mpls_hdr; + + mpls_hdr = misc2->outer_first_mpls_over_udp_label << HDR_MPLS_OFFSET_LABEL; + misc2->outer_first_mpls_over_udp_label = 0; + mpls_hdr |= misc2->outer_first_mpls_over_udp_exp << HDR_MPLS_OFFSET_EXP; + misc2->outer_first_mpls_over_udp_exp = 0; + mpls_hdr |= misc2->outer_first_mpls_over_udp_s_bos << HDR_MPLS_OFFSET_S_BOS; + misc2->outer_first_mpls_over_udp_s_bos = 0; + mpls_hdr |= misc2->outer_first_mpls_over_udp_ttl << HDR_MPLS_OFFSET_TTL; + misc2->outer_first_mpls_over_udp_ttl = 0; + + parser_id = sb->caps->flex_parser_id_mpls_over_udp; + parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); + *(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr); + + return 0; +} + +void dr_ste_v1_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_tnl_mpls_over_udp_tag(mask, sb, sb->bit_mask); + + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + sb->lu_type = sb->caps->flex_parser_id_mpls_over_udp > DR_STE_MAX_FLEX_0_ID ? + DR_STE_V1_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V1_LU_TYPE_FLEX_PARSER_0; + + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_mpls_over_udp_tag; +} + +static int dr_ste_v1_build_tnl_mpls_over_gre_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + u8 *parser_ptr; + u8 parser_id; + u32 mpls_hdr; + + mpls_hdr = misc2->outer_first_mpls_over_gre_label << HDR_MPLS_OFFSET_LABEL; + misc2->outer_first_mpls_over_gre_label = 0; + mpls_hdr |= misc2->outer_first_mpls_over_gre_exp << HDR_MPLS_OFFSET_EXP; + misc2->outer_first_mpls_over_gre_exp = 0; + mpls_hdr |= misc2->outer_first_mpls_over_gre_s_bos << HDR_MPLS_OFFSET_S_BOS; + misc2->outer_first_mpls_over_gre_s_bos = 0; + mpls_hdr |= misc2->outer_first_mpls_over_gre_ttl << HDR_MPLS_OFFSET_TTL; + misc2->outer_first_mpls_over_gre_ttl = 0; + + parser_id = sb->caps->flex_parser_id_mpls_over_gre; + parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); + *(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr); + + return 0; +} + +void dr_ste_v1_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_tnl_mpls_over_gre_tag(mask, sb, sb->bit_mask); + + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + sb->lu_type = sb->caps->flex_parser_id_mpls_over_gre > DR_STE_MAX_FLEX_0_ID ? + DR_STE_V1_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V1_LU_TYPE_FLEX_PARSER_0; + + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_mpls_over_gre_tag; +} + +static int dr_ste_v1_build_icmp_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + bool is_ipv4 = DR_MASK_IS_ICMPV4_SET(misc3); + u32 *icmp_header_data; + u8 *icmp_type; + u8 *icmp_code; + + if (is_ipv4) { + icmp_header_data = &misc3->icmpv4_header_data; + icmp_type = &misc3->icmpv4_type; + icmp_code = &misc3->icmpv4_code; + } else { + icmp_header_data = &misc3->icmpv6_header_data; + icmp_type = &misc3->icmpv6_type; + icmp_code = &misc3->icmpv6_code; + } + + MLX5_SET(ste_icmp_v1, tag, icmp_header_data, *icmp_header_data); + MLX5_SET(ste_icmp_v1, tag, icmp_type, *icmp_type); + MLX5_SET(ste_icmp_v1, tag, icmp_code, *icmp_code); + + *icmp_header_data = 0; + *icmp_type = 0; + *icmp_code = 0; + + return 0; +} + +void dr_ste_v1_build_icmp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_icmp_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_ETHL4_MISC_O; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_icmp_tag; +} + +static int dr_ste_v1_build_general_purpose_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + + DR_STE_SET_TAG(general_purpose, tag, general_purpose_lookup_field, + misc2, metadata_reg_a); + + return 0; +} + +void dr_ste_v1_build_general_purpose_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_general_purpose_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_GENERAL_PURPOSE; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_general_purpose_tag; +} + +static int dr_ste_v1_build_eth_l4_misc_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + + if (sb->inner) { + DR_STE_SET_TAG(eth_l4_misc_v1, tag, seq_num, misc3, inner_tcp_seq_num); + DR_STE_SET_TAG(eth_l4_misc_v1, tag, ack_num, misc3, inner_tcp_ack_num); + } else { + DR_STE_SET_TAG(eth_l4_misc_v1, tag, seq_num, misc3, outer_tcp_seq_num); + DR_STE_SET_TAG(eth_l4_misc_v1, tag, ack_num, misc3, outer_tcp_ack_num); + } + + return 0; +} + +void dr_ste_v1_build_eth_l4_misc_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l4_misc_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_ETHL4_MISC_O; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l4_misc_tag; +} + +static int +dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + + DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag, + outer_vxlan_gpe_flags, misc3, + outer_vxlan_gpe_flags); + DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag, + outer_vxlan_gpe_next_protocol, misc3, + outer_vxlan_gpe_next_protocol); + DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag, + outer_vxlan_gpe_vni, misc3, + outer_vxlan_gpe_vni); + + return 0; +} + +void dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_tag; +} + +static int +dr_ste_v1_build_flex_parser_tnl_geneve_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_TAG(flex_parser_tnl_geneve, tag, + geneve_protocol_type, misc, geneve_protocol_type); + DR_STE_SET_TAG(flex_parser_tnl_geneve, tag, + geneve_oam, misc, geneve_oam); + DR_STE_SET_TAG(flex_parser_tnl_geneve, tag, + geneve_opt_len, misc, geneve_opt_len); + DR_STE_SET_TAG(flex_parser_tnl_geneve, tag, + geneve_vni, misc, geneve_vni); + + return 0; +} + +void dr_ste_v1_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_flex_parser_tnl_geneve_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tag; +} + +static int dr_ste_v1_build_tnl_header_0_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + uint8_t *tag) +{ + struct mlx5dr_match_misc5 *misc5 = &value->misc5; + + DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_0, misc5, tunnel_header_0); + DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_1, misc5, tunnel_header_1); + + return 0; +} + +void dr_ste_v1_build_tnl_header_0_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER; + dr_ste_v1_build_tnl_header_0_1_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_header_0_1_tag; +} + +static int dr_ste_v1_build_register_0_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + + DR_STE_SET_TAG(register_0, tag, register_0_h, misc2, metadata_reg_c_0); + DR_STE_SET_TAG(register_0, tag, register_0_l, misc2, metadata_reg_c_1); + DR_STE_SET_TAG(register_0, tag, register_1_h, misc2, metadata_reg_c_2); + DR_STE_SET_TAG(register_0, tag, register_1_l, misc2, metadata_reg_c_3); + + return 0; +} + +void dr_ste_v1_build_register_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_register_0_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_STEERING_REGISTERS_0; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_register_0_tag; +} + +static int dr_ste_v1_build_register_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + + DR_STE_SET_TAG(register_1, tag, register_2_h, misc2, metadata_reg_c_4); + DR_STE_SET_TAG(register_1, tag, register_2_l, misc2, metadata_reg_c_5); + DR_STE_SET_TAG(register_1, tag, register_3_h, misc2, metadata_reg_c_6); + DR_STE_SET_TAG(register_1, tag, register_3_l, misc2, metadata_reg_c_7); + + return 0; +} + +void dr_ste_v1_build_register_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_register_1_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_STEERING_REGISTERS_1; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_register_1_tag; +} + +static void dr_ste_v1_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *value, + u8 *bit_mask) +{ + struct mlx5dr_match_misc *misc_mask = &value->misc; + + DR_STE_SET_ONES(src_gvmi_qp_v1, bit_mask, source_gvmi, misc_mask, source_port); + DR_STE_SET_ONES(src_gvmi_qp_v1, bit_mask, source_qp, misc_mask, source_sqn); + misc_mask->source_eswitch_owner_vhca_id = 0; +} + +static int dr_ste_v1_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc *misc = &value->misc; + int id = misc->source_eswitch_owner_vhca_id; + struct mlx5dr_cmd_vport_cap *vport_cap; + struct mlx5dr_domain *dmn = sb->dmn; + struct mlx5dr_domain *vport_dmn; + u8 *bit_mask = sb->bit_mask; + struct mlx5dr_domain *peer; + + DR_STE_SET_TAG(src_gvmi_qp_v1, tag, source_qp, misc, source_sqn); + + if (sb->vhca_id_valid) { + peer = xa_load(&dmn->peer_dmn_xa, id); + /* Find port GVMI based on the eswitch_owner_vhca_id */ + if (id == dmn->info.caps.gvmi) + vport_dmn = dmn; + else if (peer && (id == peer->info.caps.gvmi)) + vport_dmn = peer; + else + return -EINVAL; + + misc->source_eswitch_owner_vhca_id = 0; + } else { + vport_dmn = dmn; + } + + if (!MLX5_GET(ste_src_gvmi_qp_v1, bit_mask, source_gvmi)) + return 0; + + vport_cap = mlx5dr_domain_get_vport_cap(vport_dmn, misc->source_port); + if (!vport_cap) { + mlx5dr_err(dmn, "Vport 0x%x is disabled or invalid\n", + misc->source_port); + return -EINVAL; + } + + if (vport_cap->vport_gvmi) + MLX5_SET(ste_src_gvmi_qp_v1, tag, source_gvmi, vport_cap->vport_gvmi); + + misc->source_port = 0; + return 0; +} + +void dr_ste_v1_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_SRC_QP_GVMI; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_src_gvmi_qpn_tag; +} + +static void dr_ste_v1_set_flex_parser(u32 *misc4_field_id, + u32 *misc4_field_value, + bool *parser_is_used, + u8 *tag) +{ + u32 id = *misc4_field_id; + u8 *parser_ptr; + + if (id >= DR_NUM_OF_FLEX_PARSERS || parser_is_used[id]) + return; + + parser_is_used[id] = true; + parser_ptr = dr_ste_calc_flex_parser_offset(tag, id); + + *(__be32 *)parser_ptr = cpu_to_be32(*misc4_field_value); + *misc4_field_id = 0; + *misc4_field_value = 0; +} + +static int dr_ste_v1_build_felx_parser_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc4 *misc_4_mask = &value->misc4; + bool parser_is_used[DR_NUM_OF_FLEX_PARSERS] = {}; + + dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_0, + &misc_4_mask->prog_sample_field_value_0, + parser_is_used, tag); + + dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_1, + &misc_4_mask->prog_sample_field_value_1, + parser_is_used, tag); + + dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_2, + &misc_4_mask->prog_sample_field_value_2, + parser_is_used, tag); + + dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_3, + &misc_4_mask->prog_sample_field_value_3, + parser_is_used, tag); + + return 0; +} + +void dr_ste_v1_build_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_0; + dr_ste_v1_build_felx_parser_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_felx_parser_tag; +} + +void dr_ste_v1_build_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_1; + dr_ste_v1_build_felx_parser_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_felx_parser_tag; +} + +static int +dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + u8 parser_id = sb->caps->flex_parser_id_geneve_tlv_option_0; + u8 *parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); + + MLX5_SET(ste_flex_parser_0, parser_ptr, flex_parser_3, + misc3->geneve_tlv_option_0_data); + misc3->geneve_tlv_option_0_data = 0; + + return 0; +} + +void +dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag(mask, sb, sb->bit_mask); + + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + sb->lu_type = sb->caps->flex_parser_id_geneve_tlv_option_0 > 3 ? + DR_STE_V1_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V1_LU_TYPE_FLEX_PARSER_0; + + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag; +} + +static int +dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + uint8_t *tag) +{ + u8 parser_id = sb->caps->flex_parser_id_geneve_tlv_option_0; + struct mlx5dr_match_misc *misc = &value->misc; + + if (misc->geneve_tlv_option_0_exist) { + MLX5_SET(ste_flex_parser_ok, tag, flex_parsers_ok, 1 << parser_id); + misc->geneve_tlv_option_0_exist = 0; + } + + return 0; +} + +void +dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_OK; + dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_tag; +} + +static int dr_ste_v1_build_flex_parser_tnl_gtpu_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + + DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, gtpu_msg_flags, misc3, gtpu_msg_flags); + DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, gtpu_msg_type, misc3, gtpu_msg_type); + DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, gtpu_teid, misc3, gtpu_teid); + + return 0; +} + +void dr_ste_v1_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_flex_parser_tnl_gtpu_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_gtpu_tag; +} + +static int +dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3); + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_teid)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3); + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_2)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3); + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3); + return 0; +} + +void +dr_ste_v1_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_0; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag; +} + +static int +dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3); + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_teid)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3); + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_2)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3); + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3); + return 0; +} + +void +dr_ste_v1_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_1; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag; +} + +int dr_ste_v1_alloc_modify_hdr_ptrn_arg(struct mlx5dr_action *action) +{ + struct mlx5dr_ptrn_mgr *ptrn_mgr; + int ret; + + ptrn_mgr = action->rewrite->dmn->ptrn_mgr; + if (!ptrn_mgr) + return -EOPNOTSUPP; + + action->rewrite->arg = mlx5dr_arg_get_obj(action->rewrite->dmn->arg_mgr, + action->rewrite->num_of_actions, + action->rewrite->data); + if (!action->rewrite->arg) { + mlx5dr_err(action->rewrite->dmn, "Failed allocating args for modify header\n"); + return -EAGAIN; + } + + action->rewrite->ptrn = + mlx5dr_ptrn_cache_get_pattern(ptrn_mgr, + action->rewrite->num_of_actions, + action->rewrite->data); + if (!action->rewrite->ptrn) { + mlx5dr_err(action->rewrite->dmn, "Failed to get pattern\n"); + ret = -EAGAIN; + goto put_arg; + } + + return 0; + +put_arg: + mlx5dr_arg_put_obj(action->rewrite->dmn->arg_mgr, + action->rewrite->arg); + return ret; +} + +void dr_ste_v1_free_modify_hdr_ptrn_arg(struct mlx5dr_action *action) +{ + mlx5dr_ptrn_cache_put_pattern(action->rewrite->dmn->ptrn_mgr, + action->rewrite->ptrn); + mlx5dr_arg_put_obj(action->rewrite->dmn->arg_mgr, + action->rewrite->arg); +} + +static struct mlx5dr_ste_ctx ste_ctx_v1 = { + /* Builders */ + .build_eth_l2_src_dst_init = &dr_ste_v1_build_eth_l2_src_dst_init, + .build_eth_l3_ipv6_src_init = &dr_ste_v1_build_eth_l3_ipv6_src_init, + .build_eth_l3_ipv6_dst_init = &dr_ste_v1_build_eth_l3_ipv6_dst_init, + .build_eth_l3_ipv4_5_tuple_init = &dr_ste_v1_build_eth_l3_ipv4_5_tuple_init, + .build_eth_l2_src_init = &dr_ste_v1_build_eth_l2_src_init, + .build_eth_l2_dst_init = &dr_ste_v1_build_eth_l2_dst_init, + .build_eth_l2_tnl_init = &dr_ste_v1_build_eth_l2_tnl_init, + .build_eth_l3_ipv4_misc_init = &dr_ste_v1_build_eth_l3_ipv4_misc_init, + .build_eth_ipv6_l3_l4_init = &dr_ste_v1_build_eth_ipv6_l3_l4_init, + .build_mpls_init = &dr_ste_v1_build_mpls_init, + .build_tnl_gre_init = &dr_ste_v1_build_tnl_gre_init, + .build_tnl_mpls_init = &dr_ste_v1_build_tnl_mpls_init, + .build_tnl_mpls_over_udp_init = &dr_ste_v1_build_tnl_mpls_over_udp_init, + .build_tnl_mpls_over_gre_init = &dr_ste_v1_build_tnl_mpls_over_gre_init, + .build_icmp_init = &dr_ste_v1_build_icmp_init, + .build_general_purpose_init = &dr_ste_v1_build_general_purpose_init, + .build_eth_l4_misc_init = &dr_ste_v1_build_eth_l4_misc_init, + .build_tnl_vxlan_gpe_init = &dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init, + .build_tnl_geneve_init = &dr_ste_v1_build_flex_parser_tnl_geneve_init, + .build_tnl_geneve_tlv_opt_init = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init, + .build_tnl_geneve_tlv_opt_exist_init = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init, + .build_register_0_init = &dr_ste_v1_build_register_0_init, + .build_register_1_init = &dr_ste_v1_build_register_1_init, + .build_src_gvmi_qpn_init = &dr_ste_v1_build_src_gvmi_qpn_init, + .build_flex_parser_0_init = &dr_ste_v1_build_flex_parser_0_init, + .build_flex_parser_1_init = &dr_ste_v1_build_flex_parser_1_init, + .build_tnl_gtpu_init = &dr_ste_v1_build_flex_parser_tnl_gtpu_init, + .build_tnl_header_0_1_init = &dr_ste_v1_build_tnl_header_0_1_init, + .build_tnl_gtpu_flex_parser_0_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_init, + .build_tnl_gtpu_flex_parser_1_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_init, + + /* Getters and Setters */ + .ste_init = &dr_ste_v1_init, + .set_next_lu_type = &dr_ste_v1_set_next_lu_type, + .get_next_lu_type = &dr_ste_v1_get_next_lu_type, + .is_miss_addr_set = &dr_ste_v1_is_miss_addr_set, + .set_miss_addr = &dr_ste_v1_set_miss_addr, + .get_miss_addr = &dr_ste_v1_get_miss_addr, + .set_hit_addr = &dr_ste_v1_set_hit_addr, + .set_byte_mask = &dr_ste_v1_set_byte_mask, + .get_byte_mask = &dr_ste_v1_get_byte_mask, + /* Actions */ + .actions_caps = DR_STE_CTX_ACTION_CAP_TX_POP | + DR_STE_CTX_ACTION_CAP_RX_PUSH | + DR_STE_CTX_ACTION_CAP_RX_ENCAP | + DR_STE_CTX_ACTION_CAP_POP_MDFY, + .set_actions_rx = &dr_ste_v1_set_actions_rx, + .set_actions_tx = &dr_ste_v1_set_actions_tx, + .modify_field_arr_sz = ARRAY_SIZE(dr_ste_v1_action_modify_field_arr), + .modify_field_arr = dr_ste_v1_action_modify_field_arr, + .set_action_set = &dr_ste_v1_set_action_set, + .set_action_add = &dr_ste_v1_set_action_add, + .set_action_copy = &dr_ste_v1_set_action_copy, + .set_action_decap_l3_list = &dr_ste_v1_set_action_decap_l3_list, + .alloc_modify_hdr_chunk = &dr_ste_v1_alloc_modify_hdr_ptrn_arg, + .dealloc_modify_hdr_chunk = &dr_ste_v1_free_modify_hdr_ptrn_arg, + + /* Send */ + .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, +}; + +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v1(void) +{ + return &ste_ctx_v1; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h new file mode 100644 index 0000000000..e2fc698670 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef _DR_STE_V1_ +#define _DR_STE_V1_ + +#include "dr_types.h" +#include "dr_ste.h" + +bool dr_ste_v1_is_miss_addr_set(u8 *hw_ste_p); +void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr); +u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p); +void dr_ste_v1_set_byte_mask(u8 *hw_ste_p, u16 byte_mask); +u16 dr_ste_v1_get_byte_mask(u8 *hw_ste_p); +void dr_ste_v1_set_next_lu_type(u8 *hw_ste_p, u16 lu_type); +u16 dr_ste_v1_get_next_lu_type(u8 *hw_ste_p); +void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size); +void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type, bool is_rx, u16 gvmi); +void dr_ste_v1_prepare_for_postsend(u8 *hw_ste_p, u32 ste_size); +void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, u8 *action_type_set, + u32 actions_caps, u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, u32 *added_stes); +void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, u8 *action_type_set, + u32 actions_caps, u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, u32 *added_stes); +void dr_ste_v1_set_action_set(u8 *d_action, u8 hw_field, u8 shifter, + u8 length, u32 data); +void dr_ste_v1_set_action_add(u8 *d_action, u8 hw_field, u8 shifter, + u8 length, u32 data); +void dr_ste_v1_set_action_copy(u8 *d_action, u8 dst_hw_field, u8 dst_shifter, + u8 dst_len, u8 src_hw_field, u8 src_shifter); +int dr_ste_v1_set_action_decap_l3_list(void *data, u32 data_sz, u8 *hw_action, + u32 hw_action_sz, u16 *used_hw_action_num); +int dr_ste_v1_alloc_modify_hdr_ptrn_arg(struct mlx5dr_action *action); +void dr_ste_v1_free_modify_hdr_ptrn_arg(struct mlx5dr_action *action); +void dr_ste_v1_build_eth_l2_src_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l3_ipv6_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l3_ipv6_src_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l3_ipv4_5_tuple_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l2_src_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l2_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l2_tnl_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l3_ipv4_misc_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_ipv6_l3_l4_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_mpls_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_gre_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_mpls_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_icmp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_general_purpose_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_eth_l4_misc_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_header_0_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_register_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_register_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); +void dr_ste_v1_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask); + +#endif /* _DR_STE_V1_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c new file mode 100644 index 0000000000..808b013cf4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "dr_ste_v1.h" + +enum { + DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_0 = 0x00, + DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1 = 0x01, + DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_2 = 0x02, + DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_0 = 0x08, + DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_1 = 0x09, + DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0 = 0x0e, + DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0 = 0x18, + DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_1 = 0x19, + DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_0 = 0x40, + DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_1 = 0x41, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_0 = 0x44, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_1 = 0x45, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_2 = 0x46, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_3 = 0x47, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_0 = 0x4c, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_1 = 0x4d, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_2 = 0x4e, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_3 = 0x4f, + DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_0 = 0x5e, + DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_1 = 0x5f, + DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_0 = 0x6f, + DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_1 = 0x70, + DR_STE_V2_ACTION_MDFY_FLD_METADATA_2_CQE = 0x7b, + DR_STE_V2_ACTION_MDFY_FLD_GNRL_PURPOSE = 0x7c, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_0 = 0x90, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_1 = 0x91, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_0 = 0x92, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_1 = 0x93, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_0 = 0x94, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_1 = 0x95, +}; + +static const struct mlx5dr_ste_action_modify_field dr_ste_v2_action_modify_field_arr[] = { + [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_1, .start = 16, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1, .start = 16, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 18, .end = 23, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_1, .start = 16, .end = 24, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_2, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_3, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_2, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_3, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV4] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV4] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_A] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_GNRL_PURPOSE, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_B] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_METADATA_2_CQE, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_2, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_EMD_31_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_EMD_47_32] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_0, .start = 0, .end = 15, + }, +}; + +static struct mlx5dr_ste_ctx ste_ctx_v2 = { + /* Builders */ + .build_eth_l2_src_dst_init = &dr_ste_v1_build_eth_l2_src_dst_init, + .build_eth_l3_ipv6_src_init = &dr_ste_v1_build_eth_l3_ipv6_src_init, + .build_eth_l3_ipv6_dst_init = &dr_ste_v1_build_eth_l3_ipv6_dst_init, + .build_eth_l3_ipv4_5_tuple_init = &dr_ste_v1_build_eth_l3_ipv4_5_tuple_init, + .build_eth_l2_src_init = &dr_ste_v1_build_eth_l2_src_init, + .build_eth_l2_dst_init = &dr_ste_v1_build_eth_l2_dst_init, + .build_eth_l2_tnl_init = &dr_ste_v1_build_eth_l2_tnl_init, + .build_eth_l3_ipv4_misc_init = &dr_ste_v1_build_eth_l3_ipv4_misc_init, + .build_eth_ipv6_l3_l4_init = &dr_ste_v1_build_eth_ipv6_l3_l4_init, + .build_mpls_init = &dr_ste_v1_build_mpls_init, + .build_tnl_gre_init = &dr_ste_v1_build_tnl_gre_init, + .build_tnl_mpls_init = &dr_ste_v1_build_tnl_mpls_init, + .build_tnl_mpls_over_udp_init = &dr_ste_v1_build_tnl_mpls_over_udp_init, + .build_tnl_mpls_over_gre_init = &dr_ste_v1_build_tnl_mpls_over_gre_init, + .build_icmp_init = &dr_ste_v1_build_icmp_init, + .build_general_purpose_init = &dr_ste_v1_build_general_purpose_init, + .build_eth_l4_misc_init = &dr_ste_v1_build_eth_l4_misc_init, + .build_tnl_vxlan_gpe_init = &dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init, + .build_tnl_geneve_init = &dr_ste_v1_build_flex_parser_tnl_geneve_init, + .build_tnl_geneve_tlv_opt_init = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init, + .build_tnl_geneve_tlv_opt_exist_init = + &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init, + .build_register_0_init = &dr_ste_v1_build_register_0_init, + .build_register_1_init = &dr_ste_v1_build_register_1_init, + .build_src_gvmi_qpn_init = &dr_ste_v1_build_src_gvmi_qpn_init, + .build_flex_parser_0_init = &dr_ste_v1_build_flex_parser_0_init, + .build_flex_parser_1_init = &dr_ste_v1_build_flex_parser_1_init, + .build_tnl_gtpu_init = &dr_ste_v1_build_flex_parser_tnl_gtpu_init, + .build_tnl_header_0_1_init = &dr_ste_v1_build_tnl_header_0_1_init, + .build_tnl_gtpu_flex_parser_0_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_init, + .build_tnl_gtpu_flex_parser_1_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_init, + + /* Getters and Setters */ + .ste_init = &dr_ste_v1_init, + .set_next_lu_type = &dr_ste_v1_set_next_lu_type, + .get_next_lu_type = &dr_ste_v1_get_next_lu_type, + .is_miss_addr_set = &dr_ste_v1_is_miss_addr_set, + .set_miss_addr = &dr_ste_v1_set_miss_addr, + .get_miss_addr = &dr_ste_v1_get_miss_addr, + .set_hit_addr = &dr_ste_v1_set_hit_addr, + .set_byte_mask = &dr_ste_v1_set_byte_mask, + .get_byte_mask = &dr_ste_v1_get_byte_mask, + + /* Actions */ + .actions_caps = DR_STE_CTX_ACTION_CAP_TX_POP | + DR_STE_CTX_ACTION_CAP_RX_PUSH | + DR_STE_CTX_ACTION_CAP_RX_ENCAP, + .set_actions_rx = &dr_ste_v1_set_actions_rx, + .set_actions_tx = &dr_ste_v1_set_actions_tx, + .modify_field_arr_sz = ARRAY_SIZE(dr_ste_v2_action_modify_field_arr), + .modify_field_arr = dr_ste_v2_action_modify_field_arr, + .set_action_set = &dr_ste_v1_set_action_set, + .set_action_add = &dr_ste_v1_set_action_add, + .set_action_copy = &dr_ste_v1_set_action_copy, + .set_action_decap_l3_list = &dr_ste_v1_set_action_decap_l3_list, + .alloc_modify_hdr_chunk = &dr_ste_v1_alloc_modify_hdr_ptrn_arg, + .dealloc_modify_hdr_chunk = &dr_ste_v1_free_modify_hdr_ptrn_arg, + + /* Send */ + .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, +}; + +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v2(void) +{ + return &ste_ctx_v2; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c new file mode 100644 index 0000000000..69294a66fd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c @@ -0,0 +1,319 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "dr_types.h" + +static int dr_table_set_miss_action_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_table_rx_tx *nic_tbl, + struct mlx5dr_action *action) +{ + struct mlx5dr_matcher_rx_tx *last_nic_matcher = NULL; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_ste_htbl *last_htbl; + struct mlx5dr_icm_chunk *chunk; + int ret; + + if (!list_empty(&nic_tbl->nic_matcher_list)) + last_nic_matcher = list_last_entry(&nic_tbl->nic_matcher_list, + struct mlx5dr_matcher_rx_tx, + list_node); + + if (last_nic_matcher) + last_htbl = last_nic_matcher->e_anchor; + else + last_htbl = nic_tbl->s_anchor; + + if (action) { + chunk = nic_tbl->nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX ? + action->dest_tbl->tbl->rx.s_anchor->chunk : + action->dest_tbl->tbl->tx.s_anchor->chunk; + nic_tbl->default_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(chunk); + } else { + nic_tbl->default_icm_addr = nic_tbl->nic_dmn->default_icm_addr; + } + + info.type = CONNECT_MISS; + info.miss_icm_addr = nic_tbl->default_icm_addr; + + ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_tbl->nic_dmn, + last_htbl, &info, true); + if (ret) + mlx5dr_dbg(dmn, "Failed to set NIC RX/TX miss action, ret %d\n", ret); + + return ret; +} + +int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, + struct mlx5dr_action *action) +{ + int ret = -EOPNOTSUPP; + + if (action && action->action_type != DR_ACTION_TYP_FT) + return -EOPNOTSUPP; + + mlx5dr_domain_lock(tbl->dmn); + + if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX || + tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) { + ret = dr_table_set_miss_action_nic(tbl->dmn, &tbl->rx, action); + if (ret) + goto out; + } + + if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX || + tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) { + ret = dr_table_set_miss_action_nic(tbl->dmn, &tbl->tx, action); + if (ret) + goto out; + } + + if (ret) + goto out; + + /* Release old action */ + if (tbl->miss_action) + refcount_dec(&tbl->miss_action->refcount); + + /* Set new miss action */ + tbl->miss_action = action; + if (tbl->miss_action) + refcount_inc(&action->refcount); + +out: + mlx5dr_domain_unlock(tbl->dmn); + return ret; +} + +static void dr_table_uninit_nic(struct mlx5dr_table_rx_tx *nic_tbl) +{ + mlx5dr_htbl_put(nic_tbl->s_anchor); +} + +static void dr_table_uninit_fdb(struct mlx5dr_table *tbl) +{ + dr_table_uninit_nic(&tbl->rx); + dr_table_uninit_nic(&tbl->tx); +} + +static void dr_table_uninit(struct mlx5dr_table *tbl) +{ + mlx5dr_domain_lock(tbl->dmn); + + switch (tbl->dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + dr_table_uninit_nic(&tbl->rx); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + dr_table_uninit_nic(&tbl->tx); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + dr_table_uninit_fdb(tbl); + break; + default: + WARN_ON(true); + break; + } + + mlx5dr_domain_unlock(tbl->dmn); +} + +static int dr_table_init_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_table_rx_tx *nic_tbl) +{ + struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn; + struct mlx5dr_htbl_connect_info info; + int ret; + + INIT_LIST_HEAD(&nic_tbl->nic_matcher_list); + + nic_tbl->default_icm_addr = nic_dmn->default_icm_addr; + + nic_tbl->s_anchor = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, + DR_CHUNK_SIZE_1, + MLX5DR_STE_LU_TYPE_DONT_CARE, + 0); + if (!nic_tbl->s_anchor) { + mlx5dr_err(dmn, "Failed allocating htbl\n"); + return -ENOMEM; + } + + info.type = CONNECT_MISS; + info.miss_icm_addr = nic_dmn->default_icm_addr; + ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, + nic_tbl->s_anchor, + &info, true); + if (ret) { + mlx5dr_err(dmn, "Failed int and send htbl\n"); + goto free_s_anchor; + } + + mlx5dr_htbl_get(nic_tbl->s_anchor); + + return 0; + +free_s_anchor: + mlx5dr_ste_htbl_free(nic_tbl->s_anchor); + return ret; +} + +static int dr_table_init_fdb(struct mlx5dr_table *tbl) +{ + int ret; + + ret = dr_table_init_nic(tbl->dmn, &tbl->rx); + if (ret) + return ret; + + ret = dr_table_init_nic(tbl->dmn, &tbl->tx); + if (ret) + goto destroy_rx; + + return 0; + +destroy_rx: + dr_table_uninit_nic(&tbl->rx); + return ret; +} + +static int dr_table_init(struct mlx5dr_table *tbl) +{ + int ret = 0; + + INIT_LIST_HEAD(&tbl->matcher_list); + + mlx5dr_domain_lock(tbl->dmn); + + switch (tbl->dmn->type) { + case MLX5DR_DOMAIN_TYPE_NIC_RX: + tbl->table_type = MLX5_FLOW_TABLE_TYPE_NIC_RX; + tbl->rx.nic_dmn = &tbl->dmn->info.rx; + ret = dr_table_init_nic(tbl->dmn, &tbl->rx); + break; + case MLX5DR_DOMAIN_TYPE_NIC_TX: + tbl->table_type = MLX5_FLOW_TABLE_TYPE_NIC_TX; + tbl->tx.nic_dmn = &tbl->dmn->info.tx; + ret = dr_table_init_nic(tbl->dmn, &tbl->tx); + break; + case MLX5DR_DOMAIN_TYPE_FDB: + tbl->table_type = MLX5_FLOW_TABLE_TYPE_FDB; + tbl->rx.nic_dmn = &tbl->dmn->info.rx; + tbl->tx.nic_dmn = &tbl->dmn->info.tx; + ret = dr_table_init_fdb(tbl); + break; + default: + WARN_ON(true); + break; + } + + mlx5dr_domain_unlock(tbl->dmn); + + return ret; +} + +static int dr_table_destroy_sw_owned_tbl(struct mlx5dr_table *tbl) +{ + return mlx5dr_cmd_destroy_flow_table(tbl->dmn->mdev, + tbl->table_id, + tbl->table_type); +} + +static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl, u16 uid) +{ + bool en_encap = !!(tbl->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT); + bool en_decap = !!(tbl->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + struct mlx5dr_cmd_create_flow_table_attr ft_attr = {}; + u64 icm_addr_rx = 0; + u64 icm_addr_tx = 0; + int ret; + + if (tbl->rx.s_anchor) + icm_addr_rx = mlx5dr_icm_pool_get_chunk_icm_addr(tbl->rx.s_anchor->chunk); + + if (tbl->tx.s_anchor) + icm_addr_tx = mlx5dr_icm_pool_get_chunk_icm_addr(tbl->tx.s_anchor->chunk); + + ft_attr.table_type = tbl->table_type; + ft_attr.icm_addr_rx = icm_addr_rx; + ft_attr.icm_addr_tx = icm_addr_tx; + ft_attr.level = tbl->dmn->info.caps.max_ft_level - 1; + ft_attr.sw_owner = true; + ft_attr.decap_en = en_decap; + ft_attr.reformat_en = en_encap; + ft_attr.uid = uid; + + ret = mlx5dr_cmd_create_flow_table(tbl->dmn->mdev, &ft_attr, + NULL, &tbl->table_id); + + return ret; +} + +struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level, + u32 flags, u16 uid) +{ + struct mlx5dr_table *tbl; + int ret; + + refcount_inc(&dmn->refcount); + + tbl = kzalloc(sizeof(*tbl), GFP_KERNEL); + if (!tbl) + goto dec_ref; + + tbl->dmn = dmn; + tbl->level = level; + tbl->flags = flags; + refcount_set(&tbl->refcount, 1); + + ret = dr_table_init(tbl); + if (ret) + goto free_tbl; + + ret = dr_table_create_sw_owned_tbl(tbl, uid); + if (ret) + goto uninit_tbl; + + INIT_LIST_HEAD(&tbl->dbg_node); + mlx5dr_dbg_tbl_add(tbl); + return tbl; + +uninit_tbl: + dr_table_uninit(tbl); +free_tbl: + kfree(tbl); +dec_ref: + refcount_dec(&dmn->refcount); + return NULL; +} + +int mlx5dr_table_destroy(struct mlx5dr_table *tbl) +{ + int ret; + + if (WARN_ON_ONCE(refcount_read(&tbl->refcount) > 1)) + return -EBUSY; + + mlx5dr_dbg_tbl_del(tbl); + ret = dr_table_destroy_sw_owned_tbl(tbl); + if (ret) + mlx5dr_err(tbl->dmn, "Failed to destroy sw owned table\n"); + + dr_table_uninit(tbl); + + if (tbl->miss_action) + refcount_dec(&tbl->miss_action->refcount); + + refcount_dec(&tbl->dmn->refcount); + kfree(tbl); + + return ret; +} + +u32 mlx5dr_table_get_id(struct mlx5dr_table *tbl) +{ + return tbl->table_id; +} + +struct mlx5dr_table *mlx5dr_table_get_from_fs_ft(struct mlx5_flow_table *ft) +{ + return ft->fs_dr_table.dr_table; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h new file mode 100644 index 0000000000..6c59de3e28 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -0,0 +1,1597 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019, Mellanox Technologies */ + +#ifndef _DR_TYPES_ +#define _DR_TYPES_ + +#include +#include +#include "fs_core.h" +#include "wq.h" +#include "lib/mlx5.h" +#include "mlx5_ifc_dr.h" +#include "mlx5dr.h" +#include "dr_dbg.h" + +#define DR_RULE_MAX_STES 18 +#define DR_ACTION_MAX_STES 5 +#define DR_STE_SVLAN 0x1 +#define DR_STE_CVLAN 0x2 +#define DR_SZ_MATCH_PARAM (MLX5_ST_SZ_DW_MATCH_PARAM * 4) +#define DR_NUM_OF_FLEX_PARSERS 8 +#define DR_STE_MAX_FLEX_0_ID 3 +#define DR_STE_MAX_FLEX_1_ID 7 +#define DR_ACTION_CACHE_LINE_SIZE 64 + +#define mlx5dr_err(dmn, arg...) mlx5_core_err((dmn)->mdev, ##arg) +#define mlx5dr_info(dmn, arg...) mlx5_core_info((dmn)->mdev, ##arg) +#define mlx5dr_dbg(dmn, arg...) mlx5_core_dbg((dmn)->mdev, ##arg) + +struct mlx5dr_ptrn_mgr; +struct mlx5dr_arg_mgr; +struct mlx5dr_arg_obj; + +static inline bool dr_is_flex_parser_0_id(u8 parser_id) +{ + return parser_id <= DR_STE_MAX_FLEX_0_ID; +} + +static inline bool dr_is_flex_parser_1_id(u8 parser_id) +{ + return parser_id > DR_STE_MAX_FLEX_0_ID; +} + +enum mlx5dr_icm_chunk_size { + DR_CHUNK_SIZE_1, + DR_CHUNK_SIZE_MIN = DR_CHUNK_SIZE_1, /* keep updated when changing */ + DR_CHUNK_SIZE_2, + DR_CHUNK_SIZE_4, + DR_CHUNK_SIZE_8, + DR_CHUNK_SIZE_16, + DR_CHUNK_SIZE_32, + DR_CHUNK_SIZE_64, + DR_CHUNK_SIZE_128, + DR_CHUNK_SIZE_256, + DR_CHUNK_SIZE_512, + DR_CHUNK_SIZE_1K, + DR_CHUNK_SIZE_2K, + DR_CHUNK_SIZE_4K, + DR_CHUNK_SIZE_8K, + DR_CHUNK_SIZE_16K, + DR_CHUNK_SIZE_32K, + DR_CHUNK_SIZE_64K, + DR_CHUNK_SIZE_128K, + DR_CHUNK_SIZE_256K, + DR_CHUNK_SIZE_512K, + DR_CHUNK_SIZE_1024K, + DR_CHUNK_SIZE_2048K, + DR_CHUNK_SIZE_MAX, +}; + +enum mlx5dr_icm_type { + DR_ICM_TYPE_STE, + DR_ICM_TYPE_MODIFY_ACTION, + DR_ICM_TYPE_MODIFY_HDR_PTRN, + DR_ICM_TYPE_MAX, +}; + +static inline enum mlx5dr_icm_chunk_size +mlx5dr_icm_next_higher_chunk(enum mlx5dr_icm_chunk_size chunk) +{ + chunk += 2; + if (chunk < DR_CHUNK_SIZE_MAX) + return chunk; + + return DR_CHUNK_SIZE_MAX; +} + +enum { + DR_STE_SIZE = 64, + DR_STE_SIZE_CTRL = 32, + DR_STE_SIZE_MATCH_TAG = 32, + DR_STE_SIZE_TAG = 16, + DR_STE_SIZE_MASK = 16, + DR_STE_SIZE_REDUCED = DR_STE_SIZE - DR_STE_SIZE_MASK, +}; + +enum mlx5dr_ste_ctx_action_cap { + DR_STE_CTX_ACTION_CAP_NONE = 0, + DR_STE_CTX_ACTION_CAP_TX_POP = 1 << 0, + DR_STE_CTX_ACTION_CAP_RX_PUSH = 1 << 1, + DR_STE_CTX_ACTION_CAP_RX_ENCAP = 1 << 2, + DR_STE_CTX_ACTION_CAP_POP_MDFY = 1 << 3, +}; + +enum { + DR_MODIFY_ACTION_SIZE = 8, +}; + +enum mlx5dr_matcher_criteria { + DR_MATCHER_CRITERIA_EMPTY = 0, + DR_MATCHER_CRITERIA_OUTER = 1 << 0, + DR_MATCHER_CRITERIA_MISC = 1 << 1, + DR_MATCHER_CRITERIA_INNER = 1 << 2, + DR_MATCHER_CRITERIA_MISC2 = 1 << 3, + DR_MATCHER_CRITERIA_MISC3 = 1 << 4, + DR_MATCHER_CRITERIA_MISC4 = 1 << 5, + DR_MATCHER_CRITERIA_MISC5 = 1 << 6, + DR_MATCHER_CRITERIA_MAX = 1 << 7, +}; + +enum mlx5dr_action_type { + DR_ACTION_TYP_TNL_L2_TO_L2, + DR_ACTION_TYP_L2_TO_TNL_L2, + DR_ACTION_TYP_TNL_L3_TO_L2, + DR_ACTION_TYP_L2_TO_TNL_L3, + DR_ACTION_TYP_DROP, + DR_ACTION_TYP_QP, + DR_ACTION_TYP_FT, + DR_ACTION_TYP_CTR, + DR_ACTION_TYP_TAG, + DR_ACTION_TYP_MODIFY_HDR, + DR_ACTION_TYP_VPORT, + DR_ACTION_TYP_POP_VLAN, + DR_ACTION_TYP_PUSH_VLAN, + DR_ACTION_TYP_INSERT_HDR, + DR_ACTION_TYP_REMOVE_HDR, + DR_ACTION_TYP_SAMPLER, + DR_ACTION_TYP_ASO_FLOW_METER, + DR_ACTION_TYP_RANGE, + DR_ACTION_TYP_MAX, +}; + +enum mlx5dr_ipv { + DR_RULE_IPV4, + DR_RULE_IPV6, + DR_RULE_IPV_MAX, +}; + +struct mlx5dr_icm_pool; +struct mlx5dr_icm_chunk; +struct mlx5dr_icm_buddy_mem; +struct mlx5dr_ste_htbl; +struct mlx5dr_match_param; +struct mlx5dr_cmd_caps; +struct mlx5dr_rule_rx_tx; +struct mlx5dr_matcher_rx_tx; +struct mlx5dr_ste_ctx; +struct mlx5dr_send_info_pool; +struct mlx5dr_icm_hot_chunk; + +struct mlx5dr_ste { + /* refcount: indicates the num of rules that using this ste */ + u32 refcount; + + /* this ste is part of a rule, located in ste's chain */ + u8 ste_chain_location; + + /* attached to the miss_list head at each htbl entry */ + struct list_head miss_list_node; + + /* this ste is member of htbl */ + struct mlx5dr_ste_htbl *htbl; + + struct mlx5dr_ste_htbl *next_htbl; + + /* The rule this STE belongs to */ + struct mlx5dr_rule_rx_tx *rule_rx_tx; +}; + +struct mlx5dr_ste_htbl_ctrl { + /* total number of valid entries belonging to this hash table. This + * includes the non collision and collision entries + */ + unsigned int num_of_valid_entries; + + /* total number of collisions entries attached to this table */ + unsigned int num_of_collisions; +}; + +struct mlx5dr_ste_htbl { + u16 lu_type; + u16 byte_mask; + u32 refcount; + struct mlx5dr_icm_chunk *chunk; + struct mlx5dr_ste *pointing_ste; + struct mlx5dr_ste_htbl_ctrl ctrl; +}; + +struct mlx5dr_ste_send_info { + struct mlx5dr_ste *ste; + struct list_head send_list; + u16 size; + u16 offset; + u8 data_cont[DR_STE_SIZE]; + u8 *data; +}; + +void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size, + u16 offset, u8 *data, + struct mlx5dr_ste_send_info *ste_info, + struct list_head *send_list, + bool copy_data); + +struct mlx5dr_ste_build { + u8 inner:1; + u8 rx:1; + u8 vhca_id_valid:1; + struct mlx5dr_domain *dmn; + struct mlx5dr_cmd_caps *caps; + u16 lu_type; + u16 byte_mask; + u8 bit_mask[DR_STE_SIZE_MASK]; + int (*ste_build_tag_func)(struct mlx5dr_match_param *spec, + struct mlx5dr_ste_build *sb, + u8 *tag); +}; + +struct mlx5dr_ste_htbl * +mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size, + u16 lu_type, u16 byte_mask); + +int mlx5dr_ste_htbl_free(struct mlx5dr_ste_htbl *htbl); + +static inline void mlx5dr_htbl_put(struct mlx5dr_ste_htbl *htbl) +{ + htbl->refcount--; + if (!htbl->refcount) + mlx5dr_ste_htbl_free(htbl); +} + +static inline void mlx5dr_htbl_get(struct mlx5dr_ste_htbl *htbl) +{ + htbl->refcount++; +} + +/* STE utils */ +u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl); +bool mlx5dr_ste_is_miss_addr_set(struct mlx5dr_ste_ctx *ste_ctx, u8 *hw_ste_p); +void mlx5dr_ste_set_miss_addr(struct mlx5dr_ste_ctx *ste_ctx, + u8 *hw_ste, u64 miss_addr); +void mlx5dr_ste_set_hit_addr(struct mlx5dr_ste_ctx *ste_ctx, + u8 *hw_ste, u64 icm_addr, u32 ht_size); +void mlx5dr_ste_set_hit_addr_by_next_htbl(struct mlx5dr_ste_ctx *ste_ctx, + u8 *hw_ste, + struct mlx5dr_ste_htbl *next_htbl); +void mlx5dr_ste_set_bit_mask(u8 *hw_ste_p, u8 *bit_mask); +bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher, + u8 ste_location); +u64 mlx5dr_ste_get_icm_addr(struct mlx5dr_ste *ste); +u64 mlx5dr_ste_get_mr_addr(struct mlx5dr_ste *ste); +struct list_head *mlx5dr_ste_get_miss_list(struct mlx5dr_ste *ste); + +#define MLX5DR_MAX_VLANS 2 +#define MLX5DR_INVALID_PATTERN_INDEX 0xffffffff + +struct mlx5dr_ste_actions_attr { + u32 modify_index; + u32 modify_pat_idx; + u16 modify_actions; + u8 *single_modify_action; + u32 decap_index; + u32 decap_pat_idx; + u16 decap_actions; + u8 decap_with_vlan:1; + u64 final_icm_addr; + u32 flow_tag; + u32 ctr_id; + u16 gvmi; + u16 hit_gvmi; + struct { + u32 id; + u32 size; + u8 param_0; + u8 param_1; + } reformat; + struct { + int count; + u32 headers[MLX5DR_MAX_VLANS]; + } vlans; + + struct { + u32 obj_id; + u32 offset; + u8 dest_reg_id; + u8 init_color; + } aso_flow_meter; + + struct { + u64 miss_icm_addr; + u32 definer_id; + u32 min; + u32 max; + } range; +}; + +void mlx5dr_ste_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_domain *dmn, + u8 *action_type_set, + u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes); +void mlx5dr_ste_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_domain *dmn, + u8 *action_type_set, + u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes); + +void mlx5dr_ste_set_action_set(struct mlx5dr_ste_ctx *ste_ctx, + __be64 *hw_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data); +void mlx5dr_ste_set_action_add(struct mlx5dr_ste_ctx *ste_ctx, + __be64 *hw_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data); +void mlx5dr_ste_set_action_copy(struct mlx5dr_ste_ctx *ste_ctx, + __be64 *hw_action, + u8 dst_hw_field, + u8 dst_shifter, + u8 dst_len, + u8 src_hw_field, + u8 src_shifter); +int mlx5dr_ste_set_action_decap_l3_list(struct mlx5dr_ste_ctx *ste_ctx, + void *data, + u32 data_sz, + u8 *hw_action, + u32 hw_action_sz, + u16 *used_hw_action_num); +int mlx5dr_ste_alloc_modify_hdr(struct mlx5dr_action *action); +void mlx5dr_ste_free_modify_hdr(struct mlx5dr_action *action); + +const struct mlx5dr_ste_action_modify_field * +mlx5dr_ste_conv_modify_hdr_sw_field(struct mlx5dr_ste_ctx *ste_ctx, u16 sw_field); + +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx(u8 version); +void mlx5dr_ste_free(struct mlx5dr_ste *ste, + struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher); +static inline void mlx5dr_ste_put(struct mlx5dr_ste *ste, + struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher) +{ + ste->refcount--; + if (!ste->refcount) + mlx5dr_ste_free(ste, matcher, nic_matcher); +} + +/* initial as 0, increased only when ste appears in a new rule */ +static inline void mlx5dr_ste_get(struct mlx5dr_ste *ste) +{ + ste->refcount++; +} + +static inline bool mlx5dr_ste_is_not_used(struct mlx5dr_ste *ste) +{ + return !ste->refcount; +} + +bool mlx5dr_ste_equal_tag(void *src, void *dst); +int mlx5dr_ste_create_next_htbl(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *ste, + u8 *cur_hw_ste, + enum mlx5dr_icm_chunk_size log_table_size); + +/* STE build functions */ +int mlx5dr_ste_build_pre_check(struct mlx5dr_domain *dmn, + u8 match_criteria, + struct mlx5dr_match_param *mask, + struct mlx5dr_match_param *value); +int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_match_param *value, + u8 *ste_arr); +void mlx5dr_ste_build_eth_l2_src_dst(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *builder, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l3_ipv4_5_tuple(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l3_ipv4_misc(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l3_ipv6_dst(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l3_ipv6_src(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l2_src(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l2_dst(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l2_tnl(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_ipv6_l3_l4(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_eth_l4_misc(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_gre(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_mpls(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_mpls(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_mpls_over_gre(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_mpls_over_udp(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_icmp(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_vxlan_gpe(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_geneve(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_geneve_tlv_opt(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_geneve_tlv_opt_exist(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_gtpu(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_gtpu_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_gtpu_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_header_0_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_register_0(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_register_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn, + bool inner, bool rx); +void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx); + +/* Actions utils */ +int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_action *actions[], + u32 num_actions, + u8 *ste_arr, + u32 *new_hw_ste_arr_sz); + +struct mlx5dr_match_spec { + u32 smac_47_16; /* Source MAC address of incoming packet */ + /* Incoming packet Ethertype - this is the Ethertype + * following the last VLAN tag of the packet + */ + u32 smac_15_0:16; /* Source MAC address of incoming packet */ + u32 ethertype:16; + + u32 dmac_47_16; /* Destination MAC address of incoming packet */ + + u32 dmac_15_0:16; /* Destination MAC address of incoming packet */ + /* Priority of first VLAN tag in the incoming packet. + * Valid only when cvlan_tag==1 or svlan_tag==1 + */ + u32 first_prio:3; + /* CFI bit of first VLAN tag in the incoming packet. + * Valid only when cvlan_tag==1 or svlan_tag==1 + */ + u32 first_cfi:1; + /* VLAN ID of first VLAN tag in the incoming packet. + * Valid only when cvlan_tag==1 or svlan_tag==1 + */ + u32 first_vid:12; + + u32 ip_protocol:8; /* IP protocol */ + /* Differentiated Services Code Point derived from + * Traffic Class/TOS field of IPv6/v4 + */ + u32 ip_dscp:6; + /* Explicit Congestion Notification derived from + * Traffic Class/TOS field of IPv6/v4 + */ + u32 ip_ecn:2; + /* The first vlan in the packet is c-vlan (0x8100). + * cvlan_tag and svlan_tag cannot be set together + */ + u32 cvlan_tag:1; + /* The first vlan in the packet is s-vlan (0x8a88). + * cvlan_tag and svlan_tag cannot be set together + */ + u32 svlan_tag:1; + u32 frag:1; /* Packet is an IP fragment */ + u32 ip_version:4; /* IP version */ + /* TCP flags. ;Bit 0: FIN;Bit 1: SYN;Bit 2: RST;Bit 3: PSH;Bit 4: ACK; + * Bit 5: URG;Bit 6: ECE;Bit 7: CWR;Bit 8: NS + */ + u32 tcp_flags:9; + + /* TCP source port.;tcp and udp sport/dport are mutually exclusive */ + u32 tcp_sport:16; + /* TCP destination port. + * tcp and udp sport/dport are mutually exclusive + */ + u32 tcp_dport:16; + + u32 reserved_auto1:16; + u32 ipv4_ihl:4; + u32 reserved_auto2:4; + u32 ttl_hoplimit:8; + + /* UDP source port.;tcp and udp sport/dport are mutually exclusive */ + u32 udp_sport:16; + /* UDP destination port.;tcp and udp sport/dport are mutually exclusive */ + u32 udp_dport:16; + + /* IPv6 source address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 src_ip_127_96; + /* IPv6 source address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 src_ip_95_64; + /* IPv6 source address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 src_ip_63_32; + /* IPv6 source address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 src_ip_31_0; + /* IPv6 destination address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 dst_ip_127_96; + /* IPv6 destination address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 dst_ip_95_64; + /* IPv6 destination address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 dst_ip_63_32; + /* IPv6 destination address of incoming packets + * For IPv4 address use bits 31:0 (rest of the bits are reserved) + * This field should be qualified by an appropriate ethertype + */ + u32 dst_ip_31_0; +}; + +struct mlx5dr_match_misc { + /* used with GRE, checksum exist when gre_c_present == 1 */ + u32 gre_c_present:1; + u32 reserved_auto1:1; + /* used with GRE, key exist when gre_k_present == 1 */ + u32 gre_k_present:1; + /* used with GRE, sequence number exist when gre_s_present == 1 */ + u32 gre_s_present:1; + u32 source_vhca_port:4; + u32 source_sqn:24; /* Source SQN */ + + u32 source_eswitch_owner_vhca_id:16; + /* Source port.;0xffff determines wire port */ + u32 source_port:16; + + /* Priority of second VLAN tag in the outer header of the incoming packet. + * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1 + */ + u32 outer_second_prio:3; + /* CFI bit of first VLAN tag in the outer header of the incoming packet. + * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1 + */ + u32 outer_second_cfi:1; + /* VLAN ID of first VLAN tag the outer header of the incoming packet. + * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1 + */ + u32 outer_second_vid:12; + /* Priority of second VLAN tag in the inner header of the incoming packet. + * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 + */ + u32 inner_second_prio:3; + /* CFI bit of first VLAN tag in the inner header of the incoming packet. + * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 + */ + u32 inner_second_cfi:1; + /* VLAN ID of first VLAN tag the inner header of the incoming packet. + * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 + */ + u32 inner_second_vid:12; + + u32 outer_second_cvlan_tag:1; + u32 inner_second_cvlan_tag:1; + /* The second vlan in the outer header of the packet is c-vlan (0x8100). + * outer_second_cvlan_tag and outer_second_svlan_tag cannot be set together + */ + u32 outer_second_svlan_tag:1; + /* The second vlan in the inner header of the packet is c-vlan (0x8100). + * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together + */ + u32 inner_second_svlan_tag:1; + /* The second vlan in the outer header of the packet is s-vlan (0x8a88). + * outer_second_cvlan_tag and outer_second_svlan_tag cannot be set together + */ + u32 reserved_auto2:12; + /* The second vlan in the inner header of the packet is s-vlan (0x8a88). + * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together + */ + u32 gre_protocol:16; /* GRE Protocol (outer) */ + + u32 gre_key_h:24; /* GRE Key[31:8] (outer) */ + u32 gre_key_l:8; /* GRE Key [7:0] (outer) */ + + u32 vxlan_vni:24; /* VXLAN VNI (outer) */ + u32 reserved_auto3:8; + + u32 geneve_vni:24; /* GENEVE VNI field (outer) */ + u32 reserved_auto4:6; + u32 geneve_tlv_option_0_exist:1; + u32 geneve_oam:1; /* GENEVE OAM field (outer) */ + + u32 reserved_auto5:12; + u32 outer_ipv6_flow_label:20; /* Flow label of incoming IPv6 packet (outer) */ + + u32 reserved_auto6:12; + u32 inner_ipv6_flow_label:20; /* Flow label of incoming IPv6 packet (inner) */ + + u32 reserved_auto7:10; + u32 geneve_opt_len:6; /* GENEVE OptLen (outer) */ + u32 geneve_protocol_type:16; /* GENEVE protocol type (outer) */ + + u32 reserved_auto8:8; + u32 bth_dst_qp:24; /* Destination QP in BTH header */ + + u32 reserved_auto9; + u32 outer_esp_spi; + u32 reserved_auto10[3]; +}; + +struct mlx5dr_match_misc2 { + u32 outer_first_mpls_label:20; /* First MPLS LABEL (outer) */ + u32 outer_first_mpls_exp:3; /* First MPLS EXP (outer) */ + u32 outer_first_mpls_s_bos:1; /* First MPLS S_BOS (outer) */ + u32 outer_first_mpls_ttl:8; /* First MPLS TTL (outer) */ + + u32 inner_first_mpls_label:20; /* First MPLS LABEL (inner) */ + u32 inner_first_mpls_exp:3; /* First MPLS EXP (inner) */ + u32 inner_first_mpls_s_bos:1; /* First MPLS S_BOS (inner) */ + u32 inner_first_mpls_ttl:8; /* First MPLS TTL (inner) */ + + u32 outer_first_mpls_over_gre_label:20; /* last MPLS LABEL (outer) */ + u32 outer_first_mpls_over_gre_exp:3; /* last MPLS EXP (outer) */ + u32 outer_first_mpls_over_gre_s_bos:1; /* last MPLS S_BOS (outer) */ + u32 outer_first_mpls_over_gre_ttl:8; /* last MPLS TTL (outer) */ + + u32 outer_first_mpls_over_udp_label:20; /* last MPLS LABEL (outer) */ + u32 outer_first_mpls_over_udp_exp:3; /* last MPLS EXP (outer) */ + u32 outer_first_mpls_over_udp_s_bos:1; /* last MPLS S_BOS (outer) */ + u32 outer_first_mpls_over_udp_ttl:8; /* last MPLS TTL (outer) */ + + u32 metadata_reg_c_7; /* metadata_reg_c_7 */ + u32 metadata_reg_c_6; /* metadata_reg_c_6 */ + u32 metadata_reg_c_5; /* metadata_reg_c_5 */ + u32 metadata_reg_c_4; /* metadata_reg_c_4 */ + u32 metadata_reg_c_3; /* metadata_reg_c_3 */ + u32 metadata_reg_c_2; /* metadata_reg_c_2 */ + u32 metadata_reg_c_1; /* metadata_reg_c_1 */ + u32 metadata_reg_c_0; /* metadata_reg_c_0 */ + u32 metadata_reg_a; /* metadata_reg_a */ + u32 reserved_auto1[3]; +}; + +struct mlx5dr_match_misc3 { + u32 inner_tcp_seq_num; + u32 outer_tcp_seq_num; + u32 inner_tcp_ack_num; + u32 outer_tcp_ack_num; + + u32 reserved_auto1:8; + u32 outer_vxlan_gpe_vni:24; + + u32 outer_vxlan_gpe_next_protocol:8; + u32 outer_vxlan_gpe_flags:8; + u32 reserved_auto2:16; + + u32 icmpv4_header_data; + u32 icmpv6_header_data; + + u8 icmpv4_type; + u8 icmpv4_code; + u8 icmpv6_type; + u8 icmpv6_code; + + u32 geneve_tlv_option_0_data; + + u32 gtpu_teid; + + u8 gtpu_msg_type; + u8 gtpu_msg_flags; + u32 reserved_auto3:16; + + u32 gtpu_dw_2; + u32 gtpu_first_ext_dw_0; + u32 gtpu_dw_0; + u32 reserved_auto4; +}; + +struct mlx5dr_match_misc4 { + u32 prog_sample_field_value_0; + u32 prog_sample_field_id_0; + u32 prog_sample_field_value_1; + u32 prog_sample_field_id_1; + u32 prog_sample_field_value_2; + u32 prog_sample_field_id_2; + u32 prog_sample_field_value_3; + u32 prog_sample_field_id_3; + u32 reserved_auto1[8]; +}; + +struct mlx5dr_match_misc5 { + u32 macsec_tag_0; + u32 macsec_tag_1; + u32 macsec_tag_2; + u32 macsec_tag_3; + u32 tunnel_header_0; + u32 tunnel_header_1; + u32 tunnel_header_2; + u32 tunnel_header_3; +}; + +struct mlx5dr_match_param { + struct mlx5dr_match_spec outer; + struct mlx5dr_match_misc misc; + struct mlx5dr_match_spec inner; + struct mlx5dr_match_misc2 misc2; + struct mlx5dr_match_misc3 misc3; + struct mlx5dr_match_misc4 misc4; + struct mlx5dr_match_misc5 misc5; +}; + +#define DR_MASK_IS_ICMPV4_SET(_misc3) ((_misc3)->icmpv4_type || \ + (_misc3)->icmpv4_code || \ + (_misc3)->icmpv4_header_data) + +#define DR_MASK_IS_SRC_IP_SET(_spec) ((_spec)->src_ip_127_96 || \ + (_spec)->src_ip_95_64 || \ + (_spec)->src_ip_63_32 || \ + (_spec)->src_ip_31_0) + +#define DR_MASK_IS_DST_IP_SET(_spec) ((_spec)->dst_ip_127_96 || \ + (_spec)->dst_ip_95_64 || \ + (_spec)->dst_ip_63_32 || \ + (_spec)->dst_ip_31_0) + +struct mlx5dr_esw_caps { + u64 drop_icm_address_rx; + u64 drop_icm_address_tx; + u64 uplink_icm_address_rx; + u64 uplink_icm_address_tx; + u8 sw_owner:1; + u8 sw_owner_v2:1; +}; + +struct mlx5dr_cmd_vport_cap { + u16 vport_gvmi; + u16 vhca_gvmi; + u16 num; + u64 icm_address_rx; + u64 icm_address_tx; +}; + +struct mlx5dr_roce_cap { + u8 roce_en:1; + u8 fl_rc_qp_when_roce_disabled:1; + u8 fl_rc_qp_when_roce_enabled:1; +}; + +struct mlx5dr_vports { + struct mlx5dr_cmd_vport_cap esw_manager_caps; + struct mlx5dr_cmd_vport_cap uplink_caps; + struct xarray vports_caps_xa; +}; + +struct mlx5dr_cmd_caps { + u16 gvmi; + u64 nic_rx_drop_address; + u64 nic_tx_drop_address; + u64 nic_tx_allow_address; + u64 esw_rx_drop_address; + u64 esw_tx_drop_address; + u32 log_icm_size; + u64 hdr_modify_icm_addr; + u32 log_modify_pattern_icm_size; + u64 hdr_modify_pattern_icm_addr; + u32 flex_protocols; + u8 flex_parser_id_icmp_dw0; + u8 flex_parser_id_icmp_dw1; + u8 flex_parser_id_icmpv6_dw0; + u8 flex_parser_id_icmpv6_dw1; + u8 flex_parser_id_geneve_tlv_option_0; + u8 flex_parser_id_mpls_over_gre; + u8 flex_parser_id_mpls_over_udp; + u8 flex_parser_id_gtpu_dw_0; + u8 flex_parser_id_gtpu_teid; + u8 flex_parser_id_gtpu_dw_2; + u8 flex_parser_id_gtpu_first_ext_dw_0; + u8 flex_parser_ok_bits_supp; + u8 max_ft_level; + u16 roce_min_src_udp; + u8 sw_format_ver; + bool eswitch_manager; + bool rx_sw_owner; + bool tx_sw_owner; + bool fdb_sw_owner; + u8 rx_sw_owner_v2:1; + u8 tx_sw_owner_v2:1; + u8 fdb_sw_owner_v2:1; + struct mlx5dr_esw_caps esw_caps; + struct mlx5dr_vports vports; + bool prio_tag_required; + struct mlx5dr_roce_cap roce_caps; + u16 log_header_modify_argument_granularity; + u16 log_header_modify_argument_max_alloc; + bool support_modify_argument; + u8 is_ecpf:1; + u8 isolate_vl_tc:1; +}; + +enum mlx5dr_domain_nic_type { + DR_DOMAIN_NIC_TYPE_RX, + DR_DOMAIN_NIC_TYPE_TX, +}; + +struct mlx5dr_domain_rx_tx { + u64 drop_icm_addr; + u64 default_icm_addr; + enum mlx5dr_domain_nic_type type; + struct mutex mutex; /* protect rx/tx domain */ +}; + +struct mlx5dr_domain_info { + bool supp_sw_steering; + u32 max_inline_size; + u32 max_send_wr; + u32 max_log_sw_icm_sz; + u32 max_log_action_icm_sz; + u32 max_log_modify_hdr_pattern_icm_sz; + struct mlx5dr_domain_rx_tx rx; + struct mlx5dr_domain_rx_tx tx; + struct mlx5dr_cmd_caps caps; +}; + +struct mlx5dr_domain { + struct mlx5_core_dev *mdev; + u32 pdn; + struct mlx5_uars_page *uar; + enum mlx5dr_domain_type type; + refcount_t refcount; + struct mlx5dr_icm_pool *ste_icm_pool; + struct mlx5dr_icm_pool *action_icm_pool; + struct mlx5dr_send_info_pool *send_info_pool_rx; + struct mlx5dr_send_info_pool *send_info_pool_tx; + struct kmem_cache *chunks_kmem_cache; + struct kmem_cache *htbls_kmem_cache; + struct mlx5dr_ptrn_mgr *ptrn_mgr; + struct mlx5dr_arg_mgr *arg_mgr; + struct mlx5dr_send_ring *send_ring; + struct mlx5dr_domain_info info; + struct xarray csum_fts_xa; + struct mlx5dr_ste_ctx *ste_ctx; + struct list_head dbg_tbl_list; + struct mlx5dr_dbg_dump_info dump_info; + struct xarray definers_xa; + struct xarray peer_dmn_xa; + /* memory management statistics */ + u32 num_buddies[DR_ICM_TYPE_MAX]; +}; + +struct mlx5dr_table_rx_tx { + struct mlx5dr_ste_htbl *s_anchor; + struct mlx5dr_domain_rx_tx *nic_dmn; + u64 default_icm_addr; + struct list_head nic_matcher_list; +}; + +struct mlx5dr_table { + struct mlx5dr_domain *dmn; + struct mlx5dr_table_rx_tx rx; + struct mlx5dr_table_rx_tx tx; + u32 level; + u32 table_type; + u32 table_id; + u32 flags; + struct list_head matcher_list; + struct mlx5dr_action *miss_action; + refcount_t refcount; + struct list_head dbg_node; +}; + +struct mlx5dr_matcher_rx_tx { + struct mlx5dr_ste_htbl *s_htbl; + struct mlx5dr_ste_htbl *e_anchor; + struct mlx5dr_ste_build *ste_builder; + struct mlx5dr_ste_build ste_builder_arr[DR_RULE_IPV_MAX] + [DR_RULE_IPV_MAX] + [DR_RULE_MAX_STES]; + u8 num_of_builders; + u8 num_of_builders_arr[DR_RULE_IPV_MAX][DR_RULE_IPV_MAX]; + u64 default_icm_addr; + struct mlx5dr_table_rx_tx *nic_tbl; + u32 prio; + struct list_head list_node; + u32 rules; +}; + +struct mlx5dr_matcher { + struct mlx5dr_table *tbl; + struct mlx5dr_matcher_rx_tx rx; + struct mlx5dr_matcher_rx_tx tx; + struct list_head list_node; /* Used for both matchers and dbg managing */ + u32 prio; + struct mlx5dr_match_param mask; + u8 match_criteria; + refcount_t refcount; + struct list_head dbg_rule_list; +}; + +struct mlx5dr_ste_action_modify_field { + u16 hw_field; + u8 start; + u8 end; + u8 l3_type; + u8 l4_type; +}; + +struct mlx5dr_ptrn_obj { + struct mlx5dr_icm_chunk *chunk; + u8 *data; + u16 num_of_actions; + u32 index; + refcount_t refcount; + struct list_head list; +}; + +struct mlx5dr_arg_obj { + u32 obj_id; + u32 obj_offset; + struct list_head list_node; + u32 log_chunk_size; +}; + +struct mlx5dr_action_rewrite { + struct mlx5dr_domain *dmn; + struct mlx5dr_icm_chunk *chunk; + u8 *data; + u16 num_of_actions; + u32 index; + u8 single_action_opt:1; + u8 allow_rx:1; + u8 allow_tx:1; + u8 modify_ttl:1; + struct mlx5dr_ptrn_obj *ptrn; + struct mlx5dr_arg_obj *arg; +}; + +struct mlx5dr_action_reformat { + struct mlx5dr_domain *dmn; + u32 id; + u32 size; + u8 param_0; + u8 param_1; +}; + +struct mlx5dr_action_sampler { + struct mlx5dr_domain *dmn; + u64 rx_icm_addr; + u64 tx_icm_addr; + u32 sampler_id; +}; + +struct mlx5dr_action_dest_tbl { + u8 is_fw_tbl:1; + union { + struct mlx5dr_table *tbl; + struct { + struct mlx5dr_domain *dmn; + u32 id; + u32 group_id; + enum fs_flow_table_type type; + u64 rx_icm_addr; + u64 tx_icm_addr; + struct mlx5dr_action **ref_actions; + u32 num_of_ref_actions; + } fw_tbl; + }; +}; + +struct mlx5dr_action_range { + struct mlx5dr_domain *dmn; + struct mlx5dr_action *hit_tbl_action; + struct mlx5dr_action *miss_tbl_action; + u32 definer_id; + u32 min; + u32 max; +}; + +struct mlx5dr_action_ctr { + u32 ctr_id; + u32 offset; +}; + +struct mlx5dr_action_vport { + struct mlx5dr_domain *dmn; + struct mlx5dr_cmd_vport_cap *caps; +}; + +struct mlx5dr_action_push_vlan { + u32 vlan_hdr; /* tpid_pcp_dei_vid */ +}; + +struct mlx5dr_action_flow_tag { + u32 flow_tag; +}; + +struct mlx5dr_rule_action_member { + struct mlx5dr_action *action; + struct list_head list; +}; + +struct mlx5dr_action_aso_flow_meter { + struct mlx5dr_domain *dmn; + u32 obj_id; + u32 offset; + u8 dest_reg_id; + u8 init_color; +}; + +struct mlx5dr_action { + enum mlx5dr_action_type action_type; + refcount_t refcount; + + union { + void *data; + struct mlx5dr_action_rewrite *rewrite; + struct mlx5dr_action_reformat *reformat; + struct mlx5dr_action_sampler *sampler; + struct mlx5dr_action_dest_tbl *dest_tbl; + struct mlx5dr_action_ctr *ctr; + struct mlx5dr_action_vport *vport; + struct mlx5dr_action_push_vlan *push_vlan; + struct mlx5dr_action_flow_tag *flow_tag; + struct mlx5dr_action_aso_flow_meter *aso; + struct mlx5dr_action_range *range; + }; +}; + +enum mlx5dr_connect_type { + CONNECT_HIT = 1, + CONNECT_MISS = 2, +}; + +struct mlx5dr_htbl_connect_info { + enum mlx5dr_connect_type type; + union { + struct mlx5dr_ste_htbl *hit_next_htbl; + u64 miss_icm_addr; + }; +}; + +struct mlx5dr_rule_rx_tx { + struct mlx5dr_matcher_rx_tx *nic_matcher; + struct mlx5dr_ste *last_rule_ste; +}; + +struct mlx5dr_rule { + struct mlx5dr_matcher *matcher; + struct mlx5dr_rule_rx_tx rx; + struct mlx5dr_rule_rx_tx tx; + struct list_head rule_actions_list; + struct list_head dbg_node; + u32 flow_source; +}; + +void mlx5dr_rule_set_last_member(struct mlx5dr_rule_rx_tx *nic_rule, + struct mlx5dr_ste *ste, + bool force); +int mlx5dr_rule_get_reverse_rule_members(struct mlx5dr_ste **ste_arr, + struct mlx5dr_ste *curr_ste, + int *num_of_stes); + +struct mlx5dr_icm_chunk { + struct mlx5dr_icm_buddy_mem *buddy_mem; + + /* indicates the index of this chunk in the whole memory, + * used for deleting the chunk from the buddy + */ + unsigned int seg; + enum mlx5dr_icm_chunk_size size; + + /* Memory optimisation */ + struct mlx5dr_ste *ste_arr; + u8 *hw_ste_arr; + struct list_head *miss_list; +}; + +static inline void mlx5dr_domain_nic_lock(struct mlx5dr_domain_rx_tx *nic_dmn) +{ + mutex_lock(&nic_dmn->mutex); +} + +static inline void mlx5dr_domain_nic_unlock(struct mlx5dr_domain_rx_tx *nic_dmn) +{ + mutex_unlock(&nic_dmn->mutex); +} + +static inline void mlx5dr_domain_lock(struct mlx5dr_domain *dmn) +{ + mlx5dr_domain_nic_lock(&dmn->info.rx); + mlx5dr_domain_nic_lock(&dmn->info.tx); +} + +static inline void mlx5dr_domain_unlock(struct mlx5dr_domain *dmn) +{ + mlx5dr_domain_nic_unlock(&dmn->info.tx); + mlx5dr_domain_nic_unlock(&dmn->info.rx); +} + +int mlx5dr_matcher_add_to_tbl_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_matcher_rx_tx *nic_matcher); +int mlx5dr_matcher_remove_from_tbl_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_matcher_rx_tx *nic_matcher); + +int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + enum mlx5dr_ipv outer_ipv, + enum mlx5dr_ipv inner_ipv); + +u64 mlx5dr_icm_pool_get_chunk_mr_addr(struct mlx5dr_icm_chunk *chunk); +u32 mlx5dr_icm_pool_get_chunk_rkey(struct mlx5dr_icm_chunk *chunk); +u64 mlx5dr_icm_pool_get_chunk_icm_addr(struct mlx5dr_icm_chunk *chunk); +u32 mlx5dr_icm_pool_get_chunk_num_of_entries(struct mlx5dr_icm_chunk *chunk); +u32 mlx5dr_icm_pool_get_chunk_byte_size(struct mlx5dr_icm_chunk *chunk); +u8 *mlx5dr_ste_get_hw_ste(struct mlx5dr_ste *ste); + +struct mlx5dr_ste_htbl *mlx5dr_icm_pool_alloc_htbl(struct mlx5dr_icm_pool *pool); +void mlx5dr_icm_pool_free_htbl(struct mlx5dr_icm_pool *pool, struct mlx5dr_ste_htbl *htbl); + +static inline int +mlx5dr_icm_pool_dm_type_to_entry_size(enum mlx5dr_icm_type icm_type) +{ + if (icm_type == DR_ICM_TYPE_STE) + return DR_STE_SIZE; + + return DR_MODIFY_ACTION_SIZE; +} + +static inline u32 +mlx5dr_icm_pool_chunk_size_to_entries(enum mlx5dr_icm_chunk_size chunk_size) +{ + return 1 << chunk_size; +} + +static inline int +mlx5dr_icm_pool_chunk_size_to_byte(enum mlx5dr_icm_chunk_size chunk_size, + enum mlx5dr_icm_type icm_type) +{ + int num_of_entries; + int entry_size; + + entry_size = mlx5dr_icm_pool_dm_type_to_entry_size(icm_type); + num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(chunk_size); + + return entry_size * num_of_entries; +} + +static inline int +mlx5dr_ste_htbl_increase_threshold(struct mlx5dr_ste_htbl *htbl) +{ + int num_of_entries = + mlx5dr_icm_pool_chunk_size_to_entries(htbl->chunk->size); + + /* Threshold is 50%, one is added to table of size 1 */ + return (num_of_entries + 1) / 2; +} + +static inline bool +mlx5dr_ste_htbl_may_grow(struct mlx5dr_ste_htbl *htbl) +{ + if (htbl->chunk->size == DR_CHUNK_SIZE_MAX - 1 || !htbl->byte_mask) + return false; + + return true; +} + +struct mlx5dr_cmd_vport_cap * +mlx5dr_domain_get_vport_cap(struct mlx5dr_domain *dmn, u16 vport); + +struct mlx5dr_cmd_query_flow_table_details { + u8 status; + u8 level; + u64 sw_owner_icm_root_1; + u64 sw_owner_icm_root_0; +}; + +struct mlx5dr_cmd_create_flow_table_attr { + u32 table_type; + u16 uid; + u64 icm_addr_rx; + u64 icm_addr_tx; + u8 level; + bool sw_owner; + bool term_tbl; + bool decap_en; + bool reformat_en; +}; + +/* internal API functions */ +int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, + struct mlx5dr_cmd_caps *caps); +int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev, + bool other_vport, u16 vport_number, + u64 *icm_address_rx, + u64 *icm_address_tx); +int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev, + bool other_vport, u16 vport_number, u16 *gvmi); +int mlx5dr_cmd_query_esw_caps(struct mlx5_core_dev *mdev, + struct mlx5dr_esw_caps *caps); +int mlx5dr_cmd_query_flow_sampler(struct mlx5_core_dev *dev, + u32 sampler_id, + u64 *rx_icm_addr, + u64 *tx_icm_addr); +int mlx5dr_cmd_sync_steering(struct mlx5_core_dev *mdev); +int mlx5dr_cmd_set_fte_modify_and_vport(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 group_id, + u32 modify_header_id, + u16 vport_id); +int mlx5dr_cmd_del_flow_table_entry(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id); +int mlx5dr_cmd_alloc_modify_header(struct mlx5_core_dev *mdev, + u32 table_type, + u8 num_of_actions, + u64 *actions, + u32 *modify_header_id); +int mlx5dr_cmd_dealloc_modify_header(struct mlx5_core_dev *mdev, + u32 modify_header_id); +int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 *group_id); +int mlx5dr_cmd_destroy_flow_group(struct mlx5_core_dev *mdev, + u32 table_type, + u32 table_id, + u32 group_id); +int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev, + struct mlx5dr_cmd_create_flow_table_attr *attr, + u64 *fdb_rx_icm_addr, + u32 *table_id); +int mlx5dr_cmd_destroy_flow_table(struct mlx5_core_dev *mdev, + u32 table_id, + u32 table_type); +int mlx5dr_cmd_query_flow_table(struct mlx5_core_dev *dev, + enum fs_flow_table_type type, + u32 table_id, + struct mlx5dr_cmd_query_flow_table_details *output); +int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev, + enum mlx5_reformat_ctx_type rt, + u8 reformat_param_0, + u8 reformat_param_1, + size_t reformat_size, + void *reformat_data, + u32 *reformat_id); +void mlx5dr_cmd_destroy_reformat_ctx(struct mlx5_core_dev *mdev, + u32 reformat_id); +int mlx5dr_cmd_create_definer(struct mlx5_core_dev *mdev, + u16 format_id, + u8 *dw_selectors, + u8 *byte_selectors, + u8 *match_mask, + u32 *definer_id); +void mlx5dr_cmd_destroy_definer(struct mlx5_core_dev *mdev, + u32 definer_id); + +struct mlx5dr_cmd_gid_attr { + u8 gid[16]; + u8 mac[6]; + u32 roce_ver; +}; + +int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num, + u16 index, struct mlx5dr_cmd_gid_attr *attr); + +int mlx5dr_cmd_create_modify_header_arg(struct mlx5_core_dev *dev, + u16 log_obj_range, u32 pd, + u32 *obj_id); +void mlx5dr_cmd_destroy_modify_header_arg(struct mlx5_core_dev *dev, + u32 obj_id); + +struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn, + enum mlx5dr_icm_type icm_type); +void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool); + +struct mlx5dr_icm_chunk * +mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size); +void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk); + +void mlx5dr_ste_prepare_for_postsend(struct mlx5dr_ste_ctx *ste_ctx, + u8 *hw_ste_p, u32 ste_size); +int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn, + struct mlx5dr_domain_rx_tx *nic_dmn, + struct mlx5dr_ste_htbl *htbl, + struct mlx5dr_htbl_connect_info *connect_info, + bool update_hw_ste); +void mlx5dr_ste_set_formatted_ste(struct mlx5dr_ste_ctx *ste_ctx, + u16 gvmi, + enum mlx5dr_domain_nic_type nic_type, + struct mlx5dr_ste_htbl *htbl, + u8 *formatted_ste, + struct mlx5dr_htbl_connect_info *connect_info); +void mlx5dr_ste_copy_param(u8 match_criteria, + struct mlx5dr_match_param *set_param, + struct mlx5dr_match_parameters *mask, + bool clear); + +struct mlx5dr_qp { + struct mlx5_core_dev *mdev; + struct mlx5_wq_qp wq; + struct mlx5_uars_page *uar; + struct mlx5_wq_ctrl wq_ctrl; + u32 qpn; + struct { + unsigned int head; + unsigned int pc; + unsigned int cc; + unsigned int size; + unsigned int *wqe_head; + unsigned int wqe_cnt; + } sq; + struct { + unsigned int pc; + unsigned int cc; + unsigned int size; + unsigned int wqe_cnt; + } rq; + int max_inline_data; +}; + +struct mlx5dr_cq { + struct mlx5_core_dev *mdev; + struct mlx5_cqwq wq; + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5_core_cq mcq; + struct mlx5dr_qp *qp; +}; + +struct mlx5dr_mr { + struct mlx5_core_dev *mdev; + u32 mkey; + dma_addr_t dma_addr; + void *addr; + size_t size; +}; + +struct mlx5dr_send_ring { + struct mlx5dr_cq *cq; + struct mlx5dr_qp *qp; + struct mlx5dr_mr *mr; + /* How much wqes are waiting for completion */ + u32 pending_wqe; + /* Signal request per this trash hold value */ + u16 signal_th; + /* Each post_send_size less than max_post_send_size */ + u32 max_post_send_size; + /* manage the send queue */ + u32 tx_head; + void *buf; + u32 buf_size; + u8 *sync_buff; + struct mlx5dr_mr *sync_mr; + spinlock_t lock; /* Protect the data path of the send ring */ + bool err_state; /* send_ring is not usable in err state */ +}; + +int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn); +void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, + struct mlx5dr_send_ring *send_ring); +int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn); +int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, + struct mlx5dr_ste *ste, + u8 *data, + u16 size, + u16 offset); +int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *formatted_ste, u8 *mask); +int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, + struct mlx5dr_ste_htbl *htbl, + u8 *ste_init_data, + bool update_hw_ste); +int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, + struct mlx5dr_action *action); +int mlx5dr_send_postsend_pattern(struct mlx5dr_domain *dmn, + struct mlx5dr_icm_chunk *chunk, + u16 num_of_actions, + u8 *data); +int mlx5dr_send_postsend_args(struct mlx5dr_domain *dmn, u64 arg_id, + u16 num_of_actions, u8 *actions_data); + +int mlx5dr_send_info_pool_create(struct mlx5dr_domain *dmn); +void mlx5dr_send_info_pool_destroy(struct mlx5dr_domain *dmn); +struct mlx5dr_ste_send_info *mlx5dr_send_info_alloc(struct mlx5dr_domain *dmn, + enum mlx5dr_domain_nic_type nic_type); +void mlx5dr_send_info_free(struct mlx5dr_ste_send_info *ste_send_info); + +struct mlx5dr_cmd_ft_info { + u32 id; + u16 vport; + enum fs_flow_table_type type; +}; + +struct mlx5dr_cmd_flow_destination_hw_info { + enum mlx5_flow_destination_type type; + union { + u32 tir_num; + u32 ft_num; + u32 ft_id; + u32 counter_id; + u32 sampler_id; + struct { + u16 num; + u16 vhca_id; + u32 reformat_id; + u8 flags; + } vport; + }; +}; + +struct mlx5dr_cmd_fte_info { + u32 dests_size; + u32 index; + struct mlx5_flow_context flow_context; + u32 *val; + struct mlx5_flow_act action; + struct mlx5dr_cmd_flow_destination_hw_info *dest_arr; + bool ignore_flow_level; +}; + +int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev, + int opmod, int modify_mask, + struct mlx5dr_cmd_ft_info *ft, + u32 group_id, + struct mlx5dr_cmd_fte_info *fte); + +bool mlx5dr_ste_supp_ttl_cs_recalc(struct mlx5dr_cmd_caps *caps); + +struct mlx5dr_fw_recalc_cs_ft { + u64 rx_icm_addr; + u32 table_id; + u32 group_id; + u32 modify_hdr_id; +}; + +struct mlx5dr_fw_recalc_cs_ft * +mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u16 vport_num); +void mlx5dr_fw_destroy_recalc_cs_ft(struct mlx5dr_domain *dmn, + struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft); +int mlx5dr_domain_get_recalc_cs_ft_addr(struct mlx5dr_domain *dmn, + u16 vport_num, + u64 *rx_icm_addr); +int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn, + struct mlx5dr_cmd_flow_destination_hw_info *dest, + int num_dest, + bool reformat_req, + u32 *tbl_id, + u32 *group_id, + bool ignore_flow_level, + u32 flow_source); +void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn, u32 tbl_id, + u32 group_id); + +static inline bool mlx5dr_is_fw_table(struct mlx5_flow_table *ft) +{ + return !ft->fs_dr_table.dr_table; +} + +static inline bool mlx5dr_supp_match_ranges(struct mlx5_core_dev *dev) +{ + return (MLX5_CAP_GEN(dev, steering_format_version) >= + MLX5_STEERING_FORMAT_CONNECTX_6DX) && + (MLX5_CAP_GEN_64(dev, match_definer_format_supported) & + (1ULL << MLX5_IFC_DEFINER_FORMAT_ID_SELECT)); +} + +bool mlx5dr_domain_is_support_ptrn_arg(struct mlx5dr_domain *dmn); +struct mlx5dr_ptrn_mgr *mlx5dr_ptrn_mgr_create(struct mlx5dr_domain *dmn); +void mlx5dr_ptrn_mgr_destroy(struct mlx5dr_ptrn_mgr *mgr); +struct mlx5dr_ptrn_obj *mlx5dr_ptrn_cache_get_pattern(struct mlx5dr_ptrn_mgr *mgr, + u16 num_of_actions, u8 *data); +void mlx5dr_ptrn_cache_put_pattern(struct mlx5dr_ptrn_mgr *mgr, + struct mlx5dr_ptrn_obj *pattern); +struct mlx5dr_arg_mgr *mlx5dr_arg_mgr_create(struct mlx5dr_domain *dmn); +void mlx5dr_arg_mgr_destroy(struct mlx5dr_arg_mgr *mgr); +struct mlx5dr_arg_obj *mlx5dr_arg_get_obj(struct mlx5dr_arg_mgr *mgr, + u16 num_of_actions, + u8 *data); +void mlx5dr_arg_put_obj(struct mlx5dr_arg_mgr *mgr, + struct mlx5dr_arg_obj *arg_obj); +u32 mlx5dr_arg_get_obj_id(struct mlx5dr_arg_obj *arg_obj); + +#endif /* _DR_TYPES_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c new file mode 100644 index 0000000000..14f6df88b1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -0,0 +1,873 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies */ + +#include +#include "mlx5_core.h" +#include "fs_core.h" +#include "fs_cmd.h" +#include "mlx5dr.h" +#include "fs_dr.h" +#include "dr_types.h" + +static bool dr_is_fw_term_table(struct mlx5_flow_table *ft) +{ + if (ft->flags & MLX5_FLOW_TABLE_TERMINATION) + return true; + + return false; +} + +static int mlx5_cmd_dr_update_root_ft(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + u32 underlay_qpn, + bool disconnect) +{ + return mlx5_fs_cmd_get_fw_cmds()->update_root_ft(ns, ft, underlay_qpn, + disconnect); +} + +static int set_miss_action(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) +{ + struct mlx5dr_action *old_miss_action; + struct mlx5dr_action *action = NULL; + struct mlx5dr_table *next_tbl; + int err; + + next_tbl = next_ft ? next_ft->fs_dr_table.dr_table : NULL; + if (next_tbl) { + action = mlx5dr_action_create_dest_table(next_tbl); + if (!action) + return -EINVAL; + } + old_miss_action = ft->fs_dr_table.miss_action; + err = mlx5dr_table_set_miss_action(ft->fs_dr_table.dr_table, action); + if (err && action) { + err = mlx5dr_action_destroy(action); + if (err) + mlx5_core_err(ns->dev, + "Failed to destroy action (%d)\n", err); + action = NULL; + } + ft->fs_dr_table.miss_action = action; + if (old_miss_action) { + err = mlx5dr_action_destroy(old_miss_action); + if (err) + mlx5_core_err(ns->dev, "Failed to destroy action (%d)\n", + err); + } + + return err; +} + +static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table_attr *ft_attr, + struct mlx5_flow_table *next_ft) +{ + struct mlx5dr_table *tbl; + u32 flags; + int err; + + if (dr_is_fw_term_table(ft)) + return mlx5_fs_cmd_get_fw_cmds()->create_flow_table(ns, ft, + ft_attr, + next_ft); + flags = ft->flags; + /* turn off encap/decap if not supported for sw-str by fw */ + if (!MLX5_CAP_FLOWTABLE(ns->dev, sw_owner_reformat_supported)) + flags = ft->flags & ~(MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + + tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain, ft->level, flags, + ft_attr->uid); + if (!tbl) { + mlx5_core_err(ns->dev, "Failed creating dr flow_table\n"); + return -EINVAL; + } + + ft->fs_dr_table.dr_table = tbl; + ft->id = mlx5dr_table_get_id(tbl); + + if (next_ft) { + err = set_miss_action(ns, ft, next_ft); + if (err) { + mlx5dr_table_destroy(tbl); + ft->fs_dr_table.dr_table = NULL; + return err; + } + } + + ft->max_fte = INT_MAX; + + return 0; +} + +static int mlx5_cmd_dr_destroy_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft) +{ + struct mlx5dr_action *action = ft->fs_dr_table.miss_action; + int err; + + if (dr_is_fw_term_table(ft)) + return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_table(ns, ft); + + err = mlx5dr_table_destroy(ft->fs_dr_table.dr_table); + if (err) { + mlx5_core_err(ns->dev, "Failed to destroy flow_table (%d)\n", + err); + return err; + } + if (action) { + err = mlx5dr_action_destroy(action); + if (err) { + mlx5_core_err(ns->dev, "Failed to destroy action(%d)\n", + err); + return err; + } + } + + return err; +} + +static int mlx5_cmd_dr_modify_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) +{ + if (dr_is_fw_term_table(ft)) + return mlx5_fs_cmd_get_fw_cmds()->modify_flow_table(ns, ft, next_ft); + + return set_miss_action(ns, ft, next_ft); +} + +static int mlx5_cmd_dr_create_flow_group(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + u32 *in, + struct mlx5_flow_group *fg) +{ + struct mlx5dr_matcher *matcher; + u32 priority = MLX5_GET(create_flow_group_in, in, + start_flow_index); + u8 match_criteria_enable = MLX5_GET(create_flow_group_in, + in, + match_criteria_enable); + struct mlx5dr_match_parameters mask; + + if (dr_is_fw_term_table(ft)) + return mlx5_fs_cmd_get_fw_cmds()->create_flow_group(ns, ft, in, + fg); + + mask.match_buf = MLX5_ADDR_OF(create_flow_group_in, + in, match_criteria); + mask.match_sz = sizeof(fg->mask.match_criteria); + + matcher = mlx5dr_matcher_create(ft->fs_dr_table.dr_table, + priority, + match_criteria_enable, + &mask); + if (!matcher) { + mlx5_core_err(ns->dev, "Failed creating matcher\n"); + return -EINVAL; + } + + fg->fs_dr_matcher.dr_matcher = matcher; + return 0; +} + +static int mlx5_cmd_dr_destroy_flow_group(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg) +{ + if (dr_is_fw_term_table(ft)) + return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_group(ns, ft, fg); + + return mlx5dr_matcher_destroy(fg->fs_dr_matcher.dr_matcher); +} + +static struct mlx5dr_action *create_vport_action(struct mlx5dr_domain *domain, + struct mlx5_flow_rule *dst) +{ + struct mlx5_flow_destination *dest_attr = &dst->dest_attr; + + return mlx5dr_action_create_dest_vport(domain, dest_attr->vport.num, + dest_attr->vport.flags & + MLX5_FLOW_DEST_VPORT_VHCA_ID, + dest_attr->vport.vhca_id); +} + +static struct mlx5dr_action *create_uplink_action(struct mlx5dr_domain *domain, + struct mlx5_flow_rule *dst) +{ + struct mlx5_flow_destination *dest_attr = &dst->dest_attr; + + return mlx5dr_action_create_dest_vport(domain, MLX5_VPORT_UPLINK, 1, + dest_attr->vport.vhca_id); +} + +static struct mlx5dr_action *create_ft_action(struct mlx5dr_domain *domain, + struct mlx5_flow_rule *dst) +{ + struct mlx5_flow_table *dest_ft = dst->dest_attr.ft; + + if (mlx5dr_is_fw_table(dest_ft)) + return mlx5dr_action_create_dest_flow_fw_table(domain, dest_ft); + return mlx5dr_action_create_dest_table(dest_ft->fs_dr_table.dr_table); +} + +static struct mlx5dr_action *create_range_action(struct mlx5dr_domain *domain, + struct mlx5_flow_rule *dst) +{ + return mlx5dr_action_create_dest_match_range(domain, + dst->dest_attr.range.field, + dst->dest_attr.range.hit_ft, + dst->dest_attr.range.miss_ft, + dst->dest_attr.range.min, + dst->dest_attr.range.max); +} + +static struct mlx5dr_action *create_action_push_vlan(struct mlx5dr_domain *domain, + struct mlx5_fs_vlan *vlan) +{ + u16 n_ethtype = vlan->ethtype; + u8 prio = vlan->prio; + u16 vid = vlan->vid; + u32 vlan_hdr; + + vlan_hdr = (u32)n_ethtype << 16 | (u32)(prio) << 12 | (u32)vid; + return mlx5dr_action_create_push_vlan(domain, htonl(vlan_hdr)); +} + +static bool contain_vport_reformat_action(struct mlx5_flow_rule *dst) +{ + return (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT || + dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) && + dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID; +} + +/* We want to support a rule with 32 destinations, which means we need to + * account for 32 destinations plus usually a counter plus one more action + * for a multi-destination flow table. + */ +#define MLX5_FLOW_CONTEXT_ACTION_MAX 34 +static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *group, + struct fs_fte *fte) +{ + struct mlx5dr_domain *domain = ns->fs_dr_domain.dr_domain; + struct mlx5dr_action_dest *term_actions; + struct mlx5dr_match_parameters params; + struct mlx5_core_dev *dev = ns->dev; + struct mlx5dr_action **fs_dr_actions; + struct mlx5dr_action *tmp_action; + struct mlx5dr_action **actions; + bool delay_encap_set = false; + struct mlx5dr_rule *rule; + struct mlx5_flow_rule *dst; + int fs_dr_num_actions = 0; + int num_term_actions = 0; + int num_actions = 0; + size_t match_sz; + int err = 0; + int i; + + if (dr_is_fw_term_table(ft)) + return mlx5_fs_cmd_get_fw_cmds()->create_fte(ns, ft, group, fte); + + actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, sizeof(*actions), + GFP_KERNEL); + if (!actions) { + err = -ENOMEM; + goto out_err; + } + + fs_dr_actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, + sizeof(*fs_dr_actions), GFP_KERNEL); + if (!fs_dr_actions) { + err = -ENOMEM; + goto free_actions_alloc; + } + + term_actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, + sizeof(*term_actions), GFP_KERNEL); + if (!term_actions) { + err = -ENOMEM; + goto free_fs_dr_actions_alloc; + } + + match_sz = sizeof(fte->val); + + /* Drop reformat action bit if destination vport set with reformat */ + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + list_for_each_entry(dst, &fte->node.children, node.list) { + if (!contain_vport_reformat_action(dst)) + continue; + + fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + break; + } + } + + /* The order of the actions are must to be keep, only the following + * order is supported by SW steering: + * TX: modify header -> push vlan -> encap + * RX: decap -> pop vlan -> modify header + */ + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { + enum mlx5dr_action_reformat_type decap_type = + DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2; + + tmp_action = mlx5dr_action_create_packet_reformat(domain, + decap_type, + 0, 0, 0, + NULL); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) { + bool is_decap; + + if (fte->action.pkt_reformat->owner == MLX5_FLOW_RESOURCE_OWNER_FW) { + err = -EINVAL; + mlx5dr_err(domain, "FW-owned reformat can't be used in SW rule\n"); + goto free_actions; + } + + is_decap = fte->action.pkt_reformat->reformat_type == + MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; + + if (is_decap) + actions[num_actions++] = + fte->action.pkt_reformat->action.dr_action; + else + delay_encap_set = true; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) { + tmp_action = + mlx5dr_action_create_pop_vlan(); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2) { + tmp_action = + mlx5dr_action_create_pop_vlan(); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + actions[num_actions++] = + fte->action.modify_hdr->action.dr_action; + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) { + tmp_action = create_action_push_vlan(domain, &fte->action.vlan[0]); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) { + tmp_action = create_action_push_vlan(domain, &fte->action.vlan[1]); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (delay_encap_set) + actions[num_actions++] = + fte->action.pkt_reformat->action.dr_action; + + /* The order of the actions below is not important */ + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_DROP) { + tmp_action = mlx5dr_action_create_drop(); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + term_actions[num_term_actions++].dest = tmp_action; + } + + if (fte->flow_context.flow_tag) { + tmp_action = + mlx5dr_action_create_tag(fte->flow_context.flow_tag); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + list_for_each_entry(dst, &fte->node.children, node.list) { + enum mlx5_flow_destination_type type = dst->dest_attr.type; + u32 id; + + if (fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || + num_term_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } + + if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + + switch (type) { + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: + tmp_action = create_ft_action(domain, dst); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + term_actions[num_term_actions++].dest = tmp_action; + break; + case MLX5_FLOW_DESTINATION_TYPE_UPLINK: + case MLX5_FLOW_DESTINATION_TYPE_VPORT: + tmp_action = type == MLX5_FLOW_DESTINATION_TYPE_VPORT ? + create_vport_action(domain, dst) : + create_uplink_action(domain, dst); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + term_actions[num_term_actions].dest = tmp_action; + + if (dst->dest_attr.vport.flags & + MLX5_FLOW_DEST_VPORT_REFORMAT_ID) + term_actions[num_term_actions].reformat = + dst->dest_attr.vport.pkt_reformat->action.dr_action; + + num_term_actions++; + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: + id = dst->dest_attr.ft_num; + tmp_action = mlx5dr_action_create_dest_table_num(domain, + id); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + term_actions[num_term_actions++].dest = tmp_action; + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER: + id = dst->dest_attr.sampler_id; + tmp_action = mlx5dr_action_create_flow_sampler(domain, + id); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + term_actions[num_term_actions++].dest = tmp_action; + break; + case MLX5_FLOW_DESTINATION_TYPE_RANGE: + tmp_action = create_range_action(domain, dst); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + term_actions[num_term_actions++].dest = tmp_action; + break; + default: + err = -EOPNOTSUPP; + goto free_actions; + } + } + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + list_for_each_entry(dst, &fte->node.children, node.list) { + u32 id; + + if (dst->dest_attr.type != + MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || + fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } + + id = dst->dest_attr.counter_id; + tmp_action = + mlx5dr_action_create_flow_counter(id); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + } + + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) { + if (fte->action.exe_aso.type != MLX5_EXE_ASO_FLOW_METER) { + err = -EOPNOTSUPP; + goto free_actions; + } + + tmp_action = + mlx5dr_action_create_aso(domain, + fte->action.exe_aso.object_id, + fte->action.exe_aso.return_reg_id, + fte->action.exe_aso.type, + fte->action.exe_aso.flow_meter.init_color, + fte->action.exe_aso.flow_meter.meter_idx); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + params.match_sz = match_sz; + params.match_buf = (u64 *)fte->val; + if (num_term_actions == 1) { + if (term_actions->reformat) { + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } + actions[num_actions++] = term_actions->reformat; + } + + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } + actions[num_actions++] = term_actions->dest; + } else if (num_term_actions > 1) { + bool ignore_flow_level = + !!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL); + u32 flow_source = fte->flow_context.flow_source; + + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || + fs_dr_num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } + tmp_action = mlx5dr_action_create_mult_dest_tbl(domain, + term_actions, + num_term_actions, + ignore_flow_level, + flow_source); + if (!tmp_action) { + err = -EOPNOTSUPP; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + actions[num_actions++] = tmp_action; + } + + rule = mlx5dr_rule_create(group->fs_dr_matcher.dr_matcher, + ¶ms, + num_actions, + actions, + fte->flow_context.flow_source); + if (!rule) { + err = -EINVAL; + goto free_actions; + } + + kfree(term_actions); + kfree(actions); + + fte->fs_dr_rule.dr_rule = rule; + fte->fs_dr_rule.num_actions = fs_dr_num_actions; + fte->fs_dr_rule.dr_actions = fs_dr_actions; + + return 0; + +free_actions: + /* Free in reverse order to handle action dependencies */ + for (i = fs_dr_num_actions - 1; i >= 0; i--) + if (!IS_ERR_OR_NULL(fs_dr_actions[i])) + mlx5dr_action_destroy(fs_dr_actions[i]); + + kfree(term_actions); +free_fs_dr_actions_alloc: + kfree(fs_dr_actions); +free_actions_alloc: + kfree(actions); +out_err: + mlx5_core_err(dev, "Failed to create dr rule err(%d)\n", err); + return err; +} + +static int mlx5_cmd_dr_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat_params *params, + enum mlx5_flow_namespace_type namespace, + struct mlx5_pkt_reformat *pkt_reformat) +{ + struct mlx5dr_domain *dr_domain = ns->fs_dr_domain.dr_domain; + struct mlx5dr_action *action; + int dr_reformat; + + switch (params->type) { + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_NVGRE: + case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL: + dr_reformat = DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2; + break; + case MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2: + dr_reformat = DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2; + break; + case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL: + dr_reformat = DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3; + break; + case MLX5_REFORMAT_TYPE_INSERT_HDR: + dr_reformat = DR_ACTION_REFORMAT_TYP_INSERT_HDR; + break; + case MLX5_REFORMAT_TYPE_REMOVE_HDR: + dr_reformat = DR_ACTION_REFORMAT_TYP_REMOVE_HDR; + break; + default: + mlx5_core_err(ns->dev, "Packet-reformat not supported(%d)\n", + params->type); + return -EOPNOTSUPP; + } + + action = mlx5dr_action_create_packet_reformat(dr_domain, + dr_reformat, + params->param_0, + params->param_1, + params->size, + params->data); + if (!action) { + mlx5_core_err(ns->dev, "Failed allocating packet-reformat action\n"); + return -EINVAL; + } + + pkt_reformat->owner = MLX5_FLOW_RESOURCE_OWNER_SW; + pkt_reformat->action.dr_action = action; + + return 0; +} + +static void mlx5_cmd_dr_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat *pkt_reformat) +{ + mlx5dr_action_destroy(pkt_reformat->action.dr_action); +} + +static int mlx5_cmd_dr_modify_header_alloc(struct mlx5_flow_root_namespace *ns, + u8 namespace, u8 num_actions, + void *modify_actions, + struct mlx5_modify_hdr *modify_hdr) +{ + struct mlx5dr_domain *dr_domain = ns->fs_dr_domain.dr_domain; + struct mlx5dr_action *action; + size_t actions_sz; + + actions_sz = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) * + num_actions; + action = mlx5dr_action_create_modify_header(dr_domain, 0, + actions_sz, + modify_actions); + if (!action) { + mlx5_core_err(ns->dev, "Failed allocating modify-header action\n"); + return -EINVAL; + } + + modify_hdr->owner = MLX5_FLOW_RESOURCE_OWNER_SW; + modify_hdr->action.dr_action = action; + + return 0; +} + +static void mlx5_cmd_dr_modify_header_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_modify_hdr *modify_hdr) +{ + mlx5dr_action_destroy(modify_hdr->action.dr_action); +} + +static int +mlx5_cmd_dr_destroy_match_definer(struct mlx5_flow_root_namespace *ns, + int definer_id) +{ + return -EOPNOTSUPP; +} + +static int mlx5_cmd_dr_create_match_definer(struct mlx5_flow_root_namespace *ns, + u16 format_id, u32 *match_mask) +{ + return -EOPNOTSUPP; +} + +static int mlx5_cmd_dr_delete_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct fs_fte *fte) +{ + struct mlx5_fs_dr_rule *rule = &fte->fs_dr_rule; + int err; + int i; + + if (dr_is_fw_term_table(ft)) + return mlx5_fs_cmd_get_fw_cmds()->delete_fte(ns, ft, fte); + + err = mlx5dr_rule_destroy(rule->dr_rule); + if (err) + return err; + + /* Free in reverse order to handle action dependencies */ + for (i = rule->num_actions - 1; i >= 0; i--) + if (!IS_ERR_OR_NULL(rule->dr_actions[i])) + mlx5dr_action_destroy(rule->dr_actions[i]); + + kfree(rule->dr_actions); + return 0; +} + +static int mlx5_cmd_dr_update_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *group, + int modify_mask, + struct fs_fte *fte) +{ + struct fs_fte fte_tmp = {}; + int ret; + + if (dr_is_fw_term_table(ft)) + return mlx5_fs_cmd_get_fw_cmds()->update_fte(ns, ft, group, modify_mask, fte); + + /* Backup current dr rule details */ + fte_tmp.fs_dr_rule = fte->fs_dr_rule; + memset(&fte->fs_dr_rule, 0, sizeof(struct mlx5_fs_dr_rule)); + + /* First add the new updated rule, then delete the old rule */ + ret = mlx5_cmd_dr_create_fte(ns, ft, group, fte); + if (ret) + goto restore_fte; + + ret = mlx5_cmd_dr_delete_fte(ns, ft, &fte_tmp); + WARN_ONCE(ret, "dr update fte duplicate rule deletion failed\n"); + return ret; + +restore_fte: + fte->fs_dr_rule = fte_tmp.fs_dr_rule; + return ret; +} + +static int mlx5_cmd_dr_set_peer(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns, + u16 peer_vhca_id) +{ + struct mlx5dr_domain *peer_domain = NULL; + + if (peer_ns) + peer_domain = peer_ns->fs_dr_domain.dr_domain; + mlx5dr_domain_set_peer(ns->fs_dr_domain.dr_domain, + peer_domain, peer_vhca_id); + return 0; +} + +static int mlx5_cmd_dr_create_ns(struct mlx5_flow_root_namespace *ns) +{ + ns->fs_dr_domain.dr_domain = + mlx5dr_domain_create(ns->dev, + MLX5DR_DOMAIN_TYPE_FDB); + if (!ns->fs_dr_domain.dr_domain) { + mlx5_core_err(ns->dev, "Failed to create dr flow namespace\n"); + return -EOPNOTSUPP; + } + return 0; +} + +static int mlx5_cmd_dr_destroy_ns(struct mlx5_flow_root_namespace *ns) +{ + return mlx5dr_domain_destroy(ns->fs_dr_domain.dr_domain); +} + +static u32 mlx5_cmd_dr_get_capabilities(struct mlx5_flow_root_namespace *ns, + enum fs_flow_table_type ft_type) +{ + u32 steering_caps = 0; + + if (ft_type != FS_FT_FDB || + MLX5_CAP_GEN(ns->dev, steering_format_version) == MLX5_STEERING_FORMAT_CONNECTX_5) + return 0; + + steering_caps |= MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX; + steering_caps |= MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX; + + if (mlx5dr_supp_match_ranges(ns->dev)) + steering_caps |= MLX5_FLOW_STEERING_CAP_MATCH_RANGES; + + return steering_caps; +} + +int mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat) +{ + switch (pkt_reformat->reformat_type) { + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_NVGRE: + case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL: + case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL: + case MLX5_REFORMAT_TYPE_INSERT_HDR: + return mlx5dr_action_get_pkt_reformat_id(pkt_reformat->action.dr_action); + } + return -EOPNOTSUPP; +} + +bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev) +{ + return mlx5dr_is_supported(dev); +} + +static const struct mlx5_flow_cmds mlx5_flow_cmds_dr = { + .create_flow_table = mlx5_cmd_dr_create_flow_table, + .destroy_flow_table = mlx5_cmd_dr_destroy_flow_table, + .modify_flow_table = mlx5_cmd_dr_modify_flow_table, + .create_flow_group = mlx5_cmd_dr_create_flow_group, + .destroy_flow_group = mlx5_cmd_dr_destroy_flow_group, + .create_fte = mlx5_cmd_dr_create_fte, + .update_fte = mlx5_cmd_dr_update_fte, + .delete_fte = mlx5_cmd_dr_delete_fte, + .update_root_ft = mlx5_cmd_dr_update_root_ft, + .packet_reformat_alloc = mlx5_cmd_dr_packet_reformat_alloc, + .packet_reformat_dealloc = mlx5_cmd_dr_packet_reformat_dealloc, + .modify_header_alloc = mlx5_cmd_dr_modify_header_alloc, + .modify_header_dealloc = mlx5_cmd_dr_modify_header_dealloc, + .create_match_definer = mlx5_cmd_dr_create_match_definer, + .destroy_match_definer = mlx5_cmd_dr_destroy_match_definer, + .set_peer = mlx5_cmd_dr_set_peer, + .create_ns = mlx5_cmd_dr_create_ns, + .destroy_ns = mlx5_cmd_dr_destroy_ns, + .get_capabilities = mlx5_cmd_dr_get_capabilities, +}; + +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void) +{ + return &mlx5_flow_cmds_dr; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h new file mode 100644 index 0000000000..99a3b2eff6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB + * Copyright (c) 2019 Mellanox Technologies + */ + +#ifndef _MLX5_FS_DR_ +#define _MLX5_FS_DR_ + +#include "mlx5dr.h" + +struct mlx5_flow_root_namespace; +struct fs_fte; + +struct mlx5_fs_dr_action { + struct mlx5dr_action *dr_action; +}; + +struct mlx5_fs_dr_rule { + struct mlx5dr_rule *dr_rule; + /* Only actions created by fs_dr */ + struct mlx5dr_action **dr_actions; + int num_actions; +}; + +struct mlx5_fs_dr_domain { + struct mlx5dr_domain *dr_domain; +}; + +struct mlx5_fs_dr_matcher { + struct mlx5dr_matcher *dr_matcher; +}; + +struct mlx5_fs_dr_table { + struct mlx5dr_table *dr_table; + struct mlx5dr_action *miss_action; +}; + +#ifdef CONFIG_MLX5_SW_STEERING + +bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev); + +int mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat); + +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void); + +#else + +static inline const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void) +{ + return NULL; +} + +static inline u32 mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat) +{ + return 0; +} + +static inline bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev) +{ + return false; +} + +#endif /* CONFIG_MLX5_SW_STEERING */ +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h new file mode 100644 index 0000000000..fb078fa0f0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h @@ -0,0 +1,603 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019, Mellanox Technologies */ + +#ifndef MLX5_IFC_DR_H +#define MLX5_IFC_DR_H + +enum { + MLX5DR_STE_LU_TYPE_DONT_CARE = 0x0f, +}; + +struct mlx5_ifc_ste_general_bits { + u8 entry_type[0x4]; + u8 reserved_at_4[0x4]; + u8 entry_sub_type[0x8]; + u8 byte_mask[0x10]; + + u8 next_table_base_63_48[0x10]; + u8 next_lu_type[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 linear_hash_enable[0x1]; + u8 reserved_at_5c[0x2]; + u8 next_table_rank[0x2]; + + u8 reserved_at_60[0xa0]; + u8 tag_value[0x60]; + u8 bit_mask[0x60]; +}; + +struct mlx5_ifc_ste_sx_transmit_bits { + u8 entry_type[0x4]; + u8 reserved_at_4[0x4]; + u8 entry_sub_type[0x8]; + u8 byte_mask[0x10]; + + u8 next_table_base_63_48[0x10]; + u8 next_lu_type[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 linear_hash_enable[0x1]; + u8 reserved_at_5c[0x2]; + u8 next_table_rank[0x2]; + + u8 sx_wire[0x1]; + u8 sx_func_lb[0x1]; + u8 sx_sniffer[0x1]; + u8 sx_wire_enable[0x1]; + u8 sx_func_lb_enable[0x1]; + u8 sx_sniffer_enable[0x1]; + u8 action_type[0x3]; + u8 reserved_at_69[0x1]; + u8 action_description[0x6]; + u8 gvmi[0x10]; + + u8 encap_pointer_vlan_data[0x20]; + + u8 loopback_syndome_en[0x8]; + u8 loopback_syndome[0x8]; + u8 counter_trigger[0x10]; + + u8 miss_address_63_48[0x10]; + u8 counter_trigger_23_16[0x8]; + u8 miss_address_39_32[0x8]; + + u8 miss_address_31_6[0x1a]; + u8 learning_point[0x1]; + u8 go_back[0x1]; + u8 match_polarity[0x1]; + u8 mask_mode[0x1]; + u8 miss_rank[0x2]; +}; + +struct mlx5_ifc_ste_rx_steering_mult_bits { + u8 entry_type[0x4]; + u8 reserved_at_4[0x4]; + u8 entry_sub_type[0x8]; + u8 byte_mask[0x10]; + + u8 next_table_base_63_48[0x10]; + u8 next_lu_type[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 linear_hash_enable[0x1]; + u8 reserved_at_[0x2]; + u8 next_table_rank[0x2]; + + u8 member_count[0x10]; + u8 gvmi[0x10]; + + u8 qp_list_pointer[0x20]; + + u8 reserved_at_a0[0x1]; + u8 tunneling_action[0x3]; + u8 action_description[0x4]; + u8 reserved_at_a8[0x8]; + u8 counter_trigger_15_0[0x10]; + + u8 miss_address_63_48[0x10]; + u8 counter_trigger_23_16[0x08]; + u8 miss_address_39_32[0x8]; + + u8 miss_address_31_6[0x1a]; + u8 learning_point[0x1]; + u8 fail_on_error[0x1]; + u8 match_polarity[0x1]; + u8 mask_mode[0x1]; + u8 miss_rank[0x2]; +}; + +struct mlx5_ifc_ste_modify_packet_bits { + u8 entry_type[0x4]; + u8 reserved_at_4[0x4]; + u8 entry_sub_type[0x8]; + u8 byte_mask[0x10]; + + u8 next_table_base_63_48[0x10]; + u8 next_lu_type[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 linear_hash_enable[0x1]; + u8 reserved_at_[0x2]; + u8 next_table_rank[0x2]; + + u8 number_of_re_write_actions[0x10]; + u8 gvmi[0x10]; + + u8 header_re_write_actions_pointer[0x20]; + + u8 reserved_at_a0[0x1]; + u8 tunneling_action[0x3]; + u8 action_description[0x4]; + u8 reserved_at_a8[0x8]; + u8 counter_trigger_15_0[0x10]; + + u8 miss_address_63_48[0x10]; + u8 counter_trigger_23_16[0x08]; + u8 miss_address_39_32[0x8]; + + u8 miss_address_31_6[0x1a]; + u8 learning_point[0x1]; + u8 fail_on_error[0x1]; + u8 match_polarity[0x1]; + u8 mask_mode[0x1]; + u8 miss_rank[0x2]; +}; + +struct mlx5_ifc_ste_eth_l2_src_bits { + u8 smac_47_16[0x20]; + + u8 smac_15_0[0x10]; + u8 l3_ethertype[0x10]; + + u8 qp_type[0x2]; + u8 ethertype_filter[0x1]; + u8 reserved_at_43[0x1]; + u8 sx_sniffer[0x1]; + u8 force_lb[0x1]; + u8 functional_lb[0x1]; + u8 port[0x1]; + u8 reserved_at_48[0x4]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_qualifier[0x2]; + u8 reserved_at_52[0x2]; + u8 first_vlan_id[0xc]; + + u8 ip_fragmented[0x1]; + u8 tcp_syn[0x1]; + u8 encp_type[0x2]; + u8 l3_type[0x2]; + u8 l4_type[0x2]; + u8 reserved_at_68[0x4]; + u8 second_priority[0x3]; + u8 second_cfi[0x1]; + u8 second_vlan_qualifier[0x2]; + u8 reserved_at_72[0x2]; + u8 second_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l2_dst_bits { + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 l3_ethertype[0x10]; + + u8 qp_type[0x2]; + u8 ethertype_filter[0x1]; + u8 reserved_at_43[0x1]; + u8 sx_sniffer[0x1]; + u8 force_lb[0x1]; + u8 functional_lb[0x1]; + u8 port[0x1]; + u8 reserved_at_48[0x4]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_qualifier[0x2]; + u8 reserved_at_52[0x2]; + u8 first_vlan_id[0xc]; + + u8 ip_fragmented[0x1]; + u8 tcp_syn[0x1]; + u8 encp_type[0x2]; + u8 l3_type[0x2]; + u8 l4_type[0x2]; + u8 reserved_at_68[0x4]; + u8 second_priority[0x3]; + u8 second_cfi[0x1]; + u8 second_vlan_qualifier[0x2]; + u8 reserved_at_72[0x2]; + u8 second_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l2_src_dst_bits { + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 smac_47_32[0x10]; + + u8 smac_31_0[0x20]; + + u8 sx_sniffer[0x1]; + u8 force_lb[0x1]; + u8 functional_lb[0x1]; + u8 port[0x1]; + u8 l3_type[0x2]; + u8 reserved_at_66[0x6]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_qualifier[0x2]; + u8 reserved_at_72[0x2]; + u8 first_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv4_5_tuple_bits { + u8 destination_address[0x20]; + + u8 source_address[0x20]; + + u8 source_port[0x10]; + u8 destination_port[0x10]; + + u8 fragmented[0x1]; + u8 first_fragment[0x1]; + u8 reserved_at_62[0x2]; + u8 reserved_at_64[0x1]; + u8 ecn[0x2]; + u8 tcp_ns[0x1]; + u8 tcp_cwr[0x1]; + u8 tcp_ece[0x1]; + u8 tcp_urg[0x1]; + u8 tcp_ack[0x1]; + u8 tcp_psh[0x1]; + u8 tcp_rst[0x1]; + u8 tcp_syn[0x1]; + u8 tcp_fin[0x1]; + u8 dscp[0x6]; + u8 reserved_at_76[0x2]; + u8 protocol[0x8]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv6_dst_bits { + u8 dst_ip_127_96[0x20]; + + u8 dst_ip_95_64[0x20]; + + u8 dst_ip_63_32[0x20]; + + u8 dst_ip_31_0[0x20]; +}; + +struct mlx5_ifc_ste_eth_l2_tnl_bits { + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 l3_ethertype[0x10]; + + u8 l2_tunneling_network_id[0x20]; + + u8 ip_fragmented[0x1]; + u8 tcp_syn[0x1]; + u8 encp_type[0x2]; + u8 l3_type[0x2]; + u8 l4_type[0x2]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 reserved_at_6c[0x3]; + u8 gre_key_flag[0x1]; + u8 first_vlan_qualifier[0x2]; + u8 reserved_at_72[0x2]; + u8 first_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv6_src_bits { + u8 src_ip_127_96[0x20]; + + u8 src_ip_95_64[0x20]; + + u8 src_ip_63_32[0x20]; + + u8 src_ip_31_0[0x20]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv4_misc_bits { + u8 version[0x4]; + u8 ihl[0x4]; + u8 reserved_at_8[0x8]; + u8 total_length[0x10]; + + u8 identification[0x10]; + u8 flags[0x3]; + u8 fragment_offset[0xd]; + + u8 time_to_live[0x8]; + u8 reserved_at_48[0x8]; + u8 checksum[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_ste_eth_l4_bits { + u8 fragmented[0x1]; + u8 first_fragment[0x1]; + u8 reserved_at_2[0x6]; + u8 protocol[0x8]; + u8 dst_port[0x10]; + + u8 ipv6_version[0x4]; + u8 reserved_at_24[0x1]; + u8 ecn[0x2]; + u8 tcp_ns[0x1]; + u8 tcp_cwr[0x1]; + u8 tcp_ece[0x1]; + u8 tcp_urg[0x1]; + u8 tcp_ack[0x1]; + u8 tcp_psh[0x1]; + u8 tcp_rst[0x1]; + u8 tcp_syn[0x1]; + u8 tcp_fin[0x1]; + u8 src_port[0x10]; + + u8 ipv6_payload_length[0x10]; + u8 ipv6_hop_limit[0x8]; + u8 dscp[0x6]; + u8 reserved_at_5e[0x2]; + + u8 tcp_data_offset[0x4]; + u8 reserved_at_64[0x8]; + u8 flow_label[0x14]; +}; + +struct mlx5_ifc_ste_eth_l4_misc_bits { + u8 checksum[0x10]; + u8 length[0x10]; + + u8 seq_num[0x20]; + + u8 ack_num[0x20]; + + u8 urgent_pointer[0x10]; + u8 window_size[0x10]; +}; + +struct mlx5_ifc_ste_mpls_bits { + u8 mpls0_label[0x14]; + u8 mpls0_exp[0x3]; + u8 mpls0_s_bos[0x1]; + u8 mpls0_ttl[0x8]; + + u8 mpls1_label[0x20]; + + u8 mpls2_label[0x20]; + + u8 reserved_at_60[0x16]; + u8 mpls4_s_bit[0x1]; + u8 mpls4_qualifier[0x1]; + u8 mpls3_s_bit[0x1]; + u8 mpls3_qualifier[0x1]; + u8 mpls2_s_bit[0x1]; + u8 mpls2_qualifier[0x1]; + u8 mpls1_s_bit[0x1]; + u8 mpls1_qualifier[0x1]; + u8 mpls0_s_bit[0x1]; + u8 mpls0_qualifier[0x1]; +}; + +struct mlx5_ifc_ste_register_0_bits { + u8 register_0_h[0x20]; + + u8 register_0_l[0x20]; + + u8 register_1_h[0x20]; + + u8 register_1_l[0x20]; +}; + +struct mlx5_ifc_ste_register_1_bits { + u8 register_2_h[0x20]; + + u8 register_2_l[0x20]; + + u8 register_3_h[0x20]; + + u8 register_3_l[0x20]; +}; + +struct mlx5_ifc_ste_gre_bits { + u8 gre_c_present[0x1]; + u8 reserved_at_30[0x1]; + u8 gre_k_present[0x1]; + u8 gre_s_present[0x1]; + u8 strict_src_route[0x1]; + u8 recur[0x3]; + u8 flags[0x5]; + u8 version[0x3]; + u8 gre_protocol[0x10]; + + u8 checksum[0x10]; + u8 offset[0x10]; + + u8 gre_key_h[0x18]; + u8 gre_key_l[0x8]; + + u8 seq_num[0x20]; +}; + +struct mlx5_ifc_ste_flex_parser_0_bits { + u8 flex_parser_3[0x20]; + + u8 flex_parser_2[0x20]; + + u8 flex_parser_1[0x20]; + + u8 flex_parser_0[0x20]; +}; + +struct mlx5_ifc_ste_flex_parser_1_bits { + u8 flex_parser_7[0x20]; + + u8 flex_parser_6[0x20]; + + u8 flex_parser_5[0x20]; + + u8 flex_parser_4[0x20]; +}; + +struct mlx5_ifc_ste_flex_parser_ok_bits { + u8 flex_parser_3[0x20]; + u8 flex_parser_2[0x20]; + u8 flex_parsers_ok[0x8]; + u8 reserved_at_48[0x18]; + u8 flex_parser_0[0x20]; +}; + +struct mlx5_ifc_ste_flex_parser_tnl_bits { + u8 flex_parser_tunneling_header_63_32[0x20]; + + u8 flex_parser_tunneling_header_31_0[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_ste_flex_parser_tnl_vxlan_gpe_bits { + u8 outer_vxlan_gpe_flags[0x8]; + u8 reserved_at_8[0x10]; + u8 outer_vxlan_gpe_next_protocol[0x8]; + + u8 outer_vxlan_gpe_vni[0x18]; + u8 reserved_at_38[0x8]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_ste_flex_parser_tnl_geneve_bits { + u8 reserved_at_0[0x2]; + u8 geneve_opt_len[0x6]; + u8 geneve_oam[0x1]; + u8 reserved_at_9[0x7]; + u8 geneve_protocol_type[0x10]; + + u8 geneve_vni[0x18]; + u8 reserved_at_38[0x8]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_ste_flex_parser_tnl_gtpu_bits { + u8 reserved_at_0[0x5]; + u8 gtpu_msg_flags[0x3]; + u8 gtpu_msg_type[0x8]; + u8 reserved_at_10[0x10]; + + u8 gtpu_teid[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_ste_tunnel_header_bits { + u8 tunnel_header_0[0x20]; + + u8 tunnel_header_1[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_ste_general_purpose_bits { + u8 general_purpose_lookup_field[0x20]; + + u8 reserved_at_20[0x20]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_ste_src_gvmi_qp_bits { + u8 loopback_syndrome[0x8]; + u8 reserved_at_8[0x8]; + u8 source_gvmi[0x10]; + + u8 reserved_at_20[0x5]; + u8 force_lb[0x1]; + u8 functional_lb[0x1]; + u8 source_is_requestor[0x1]; + u8 source_qp[0x18]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_l2_hdr_bits { + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 smac_47_32[0x10]; + + u8 smac_31_0[0x20]; + + u8 ethertype[0x10]; + u8 vlan_type[0x10]; + + u8 vlan[0x10]; + u8 reserved_at_90[0x10]; +}; + +/* Both HW set and HW add share the same HW format with different opcodes */ +struct mlx5_ifc_dr_action_hw_set_bits { + u8 opcode[0x8]; + u8 destination_field_code[0x8]; + u8 reserved_at_10[0x2]; + u8 destination_left_shifter[0x6]; + u8 reserved_at_18[0x3]; + u8 destination_length[0x5]; + + u8 inline_data[0x20]; +}; + +struct mlx5_ifc_dr_action_hw_copy_bits { + u8 opcode[0x8]; + u8 destination_field_code[0x8]; + u8 reserved_at_10[0x2]; + u8 destination_left_shifter[0x6]; + u8 reserved_at_18[0x2]; + u8 destination_length[0x6]; + + u8 reserved_at_20[0x8]; + u8 source_field_code[0x8]; + u8 reserved_at_30[0x2]; + u8 source_left_shifter[0x6]; + u8 reserved_at_38[0x8]; +}; + +enum { + MLX5DR_ASO_FLOW_METER_NUM_PER_OBJ = 2, +}; + +struct mlx5_ifc_ste_aso_flow_meter_action_bits { + u8 reserved_at_0[0xc]; + u8 action[0x1]; + u8 initial_color[0x2]; + u8 line_id[0x1]; +}; + +struct mlx5_ifc_ste_double_action_aso_v1_bits { + u8 action_id[0x8]; + u8 aso_context_number[0x18]; + + u8 dest_reg_id[0x2]; + u8 change_ordering_tag[0x1]; + u8 aso_check_ordering[0x1]; + u8 aso_context_type[0x4]; + u8 reserved_at_28[0x8]; + union { + u8 aso_fields[0x10]; + struct mlx5_ifc_ste_aso_flow_meter_action_bits flow_meter; + }; +}; + +#endif /* MLX5_IFC_DR_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h new file mode 100644 index 0000000000..ca3b0f1453 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h @@ -0,0 +1,469 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. */ + +#ifndef MLX5_IFC_DR_STE_V1_H +#define MLX5_IFC_DR_STE_V1_H + +enum mlx5_ifc_ste_v1_modify_hdr_offset { + MLX5_MODIFY_HEADER_V1_QW_OFFSET = 0x20, +}; + +struct mlx5_ifc_ste_single_action_flow_tag_v1_bits { + u8 action_id[0x8]; + u8 flow_tag[0x18]; +}; + +struct mlx5_ifc_ste_single_action_modify_list_v1_bits { + u8 action_id[0x8]; + u8 num_of_modify_actions[0x8]; + u8 modify_actions_ptr[0x10]; +}; + +struct mlx5_ifc_ste_single_action_remove_header_v1_bits { + u8 action_id[0x8]; + u8 reserved_at_8[0x2]; + u8 start_anchor[0x6]; + u8 reserved_at_10[0x2]; + u8 end_anchor[0x6]; + u8 reserved_at_18[0x4]; + u8 decap[0x1]; + u8 vni_to_cqe[0x1]; + u8 qos_profile[0x2]; +}; + +struct mlx5_ifc_ste_single_action_remove_header_size_v1_bits { + u8 action_id[0x8]; + u8 reserved_at_8[0x2]; + u8 start_anchor[0x6]; + u8 outer_l4_remove[0x1]; + u8 reserved_at_11[0x1]; + u8 start_offset[0x7]; + u8 reserved_at_18[0x1]; + u8 remove_size[0x6]; +}; + +struct mlx5_ifc_ste_double_action_copy_v1_bits { + u8 action_id[0x8]; + u8 destination_dw_offset[0x8]; + u8 reserved_at_10[0x2]; + u8 destination_left_shifter[0x6]; + u8 reserved_at_17[0x2]; + u8 destination_length[0x6]; + + u8 reserved_at_20[0x8]; + u8 source_dw_offset[0x8]; + u8 reserved_at_30[0x2]; + u8 source_right_shifter[0x6]; + u8 reserved_at_38[0x8]; +}; + +struct mlx5_ifc_ste_double_action_set_v1_bits { + u8 action_id[0x8]; + u8 destination_dw_offset[0x8]; + u8 reserved_at_10[0x2]; + u8 destination_left_shifter[0x6]; + u8 reserved_at_18[0x2]; + u8 destination_length[0x6]; + + u8 inline_data[0x20]; +}; + +struct mlx5_ifc_ste_double_action_add_v1_bits { + u8 action_id[0x8]; + u8 destination_dw_offset[0x8]; + u8 reserved_at_10[0x2]; + u8 destination_left_shifter[0x6]; + u8 reserved_at_18[0x2]; + u8 destination_length[0x6]; + + u8 add_value[0x20]; +}; + +struct mlx5_ifc_ste_double_action_insert_with_inline_v1_bits { + u8 action_id[0x8]; + u8 reserved_at_8[0x2]; + u8 start_anchor[0x6]; + u8 start_offset[0x7]; + u8 reserved_at_17[0x9]; + + u8 inline_data[0x20]; +}; + +struct mlx5_ifc_ste_double_action_insert_with_ptr_v1_bits { + u8 action_id[0x8]; + u8 reserved_at_8[0x2]; + u8 start_anchor[0x6]; + u8 start_offset[0x7]; + u8 size[0x6]; + u8 attributes[0x3]; + + u8 pointer[0x20]; +}; + +struct mlx5_ifc_ste_double_action_accelerated_modify_action_list_v1_bits { + u8 action_id[0x8]; + u8 modify_actions_pattern_pointer[0x18]; + + u8 number_of_modify_actions[0x8]; + u8 modify_actions_argument_pointer[0x18]; +}; + +struct mlx5_ifc_ste_match_bwc_v1_bits { + u8 entry_format[0x8]; + u8 counter_id[0x18]; + + u8 miss_address_63_48[0x10]; + u8 match_definer_ctx_idx[0x8]; + u8 miss_address_39_32[0x8]; + + u8 miss_address_31_6[0x1a]; + u8 reserved_at_5a[0x1]; + u8 match_polarity[0x1]; + u8 reparse[0x1]; + u8 reserved_at_5d[0x3]; + + u8 next_table_base_63_48[0x10]; + u8 hash_definer_ctx_idx[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 hash_type[0x2]; + u8 hash_after_actions[0x1]; + u8 reserved_at_9e[0x2]; + + u8 byte_mask[0x10]; + u8 next_entry_format[0x1]; + u8 mask_mode[0x1]; + u8 gvmi[0xe]; + + u8 action[0x40]; +}; + +struct mlx5_ifc_ste_mask_and_match_v1_bits { + u8 entry_format[0x8]; + u8 counter_id[0x18]; + + u8 miss_address_63_48[0x10]; + u8 match_definer_ctx_idx[0x8]; + u8 miss_address_39_32[0x8]; + + u8 miss_address_31_6[0x1a]; + u8 reserved_at_5a[0x1]; + u8 match_polarity[0x1]; + u8 reparse[0x1]; + u8 reserved_at_5d[0x3]; + + u8 next_table_base_63_48[0x10]; + u8 hash_definer_ctx_idx[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 hash_type[0x2]; + u8 hash_after_actions[0x1]; + u8 reserved_at_9e[0x2]; + + u8 action[0x60]; +}; + +struct mlx5_ifc_ste_match_ranges_v1_bits { + u8 entry_format[0x8]; + u8 counter_id[0x18]; + + u8 miss_address_63_48[0x10]; + u8 match_definer_ctx_idx[0x8]; + u8 miss_address_39_32[0x8]; + + u8 miss_address_31_6[0x1a]; + u8 reserved_at_5a[0x1]; + u8 match_polarity[0x1]; + u8 reparse[0x1]; + u8 reserved_at_5d[0x3]; + + u8 next_table_base_63_48[0x10]; + u8 hash_definer_ctx_idx[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 hash_type[0x2]; + u8 hash_after_actions[0x1]; + u8 reserved_at_9e[0x2]; + + u8 action[0x60]; + + u8 max_value_0[0x20]; + u8 min_value_0[0x20]; + u8 max_value_1[0x20]; + u8 min_value_1[0x20]; + u8 max_value_2[0x20]; + u8 min_value_2[0x20]; + u8 max_value_3[0x20]; + u8 min_value_3[0x20]; +}; + +struct mlx5_ifc_ste_eth_l2_src_v1_bits { + u8 reserved_at_0[0x1]; + u8 sx_sniffer[0x1]; + u8 functional_loopback[0x1]; + u8 ip_fragmented[0x1]; + u8 qp_type[0x2]; + u8 encapsulation_type[0x2]; + u8 port[0x2]; + u8 l3_type[0x2]; + u8 l4_type[0x2]; + u8 first_vlan_qualifier[0x2]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_id[0xc]; + + u8 smac_47_16[0x20]; + + u8 smac_15_0[0x10]; + u8 l3_ethertype[0x10]; + + u8 reserved_at_60[0x6]; + u8 tcp_syn[0x1]; + u8 reserved_at_67[0x3]; + u8 force_loopback[0x1]; + u8 l2_ok[0x1]; + u8 l3_ok[0x1]; + u8 l4_ok[0x1]; + u8 second_vlan_qualifier[0x2]; + + u8 second_priority[0x3]; + u8 second_cfi[0x1]; + u8 second_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l2_dst_v1_bits { + u8 reserved_at_0[0x1]; + u8 sx_sniffer[0x1]; + u8 functional_lb[0x1]; + u8 ip_fragmented[0x1]; + u8 qp_type[0x2]; + u8 encapsulation_type[0x2]; + u8 port[0x2]; + u8 l3_type[0x2]; + u8 l4_type[0x2]; + u8 first_vlan_qualifier[0x2]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_id[0xc]; + + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 l3_ethertype[0x10]; + + u8 reserved_at_60[0x6]; + u8 tcp_syn[0x1]; + u8 reserved_at_67[0x3]; + u8 force_lb[0x1]; + u8 l2_ok[0x1]; + u8 l3_ok[0x1]; + u8 l4_ok[0x1]; + u8 second_vlan_qualifier[0x2]; + u8 second_priority[0x3]; + u8 second_cfi[0x1]; + u8 second_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l2_src_dst_v1_bits { + u8 dmac_47_16[0x20]; + + u8 smac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 reserved_at_50[0x2]; + u8 functional_lb[0x1]; + u8 reserved_at_53[0x5]; + u8 port[0x2]; + u8 l3_type[0x2]; + u8 reserved_at_5c[0x2]; + u8 first_vlan_qualifier[0x2]; + + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_id[0xc]; + u8 smac_15_0[0x10]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv4_5_tuple_v1_bits { + u8 source_address[0x20]; + + u8 destination_address[0x20]; + + u8 source_port[0x10]; + u8 destination_port[0x10]; + + u8 reserved_at_60[0x4]; + u8 l4_ok[0x1]; + u8 l3_ok[0x1]; + u8 fragmented[0x1]; + u8 tcp_ns[0x1]; + u8 tcp_cwr[0x1]; + u8 tcp_ece[0x1]; + u8 tcp_urg[0x1]; + u8 tcp_ack[0x1]; + u8 tcp_psh[0x1]; + u8 tcp_rst[0x1]; + u8 tcp_syn[0x1]; + u8 tcp_fin[0x1]; + u8 dscp[0x6]; + u8 ecn[0x2]; + u8 protocol[0x8]; +}; + +struct mlx5_ifc_ste_eth_l2_tnl_v1_bits { + u8 l2_tunneling_network_id[0x20]; + + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 l3_ethertype[0x10]; + + u8 reserved_at_60[0x3]; + u8 ip_fragmented[0x1]; + u8 reserved_at_64[0x2]; + u8 encp_type[0x2]; + u8 reserved_at_68[0x2]; + u8 l3_type[0x2]; + u8 l4_type[0x2]; + u8 first_vlan_qualifier[0x2]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv4_misc_v1_bits { + u8 identification[0x10]; + u8 flags[0x3]; + u8 fragment_offset[0xd]; + + u8 total_length[0x10]; + u8 checksum[0x10]; + + u8 version[0x4]; + u8 ihl[0x4]; + u8 time_to_live[0x8]; + u8 reserved_at_50[0x10]; + + u8 reserved_at_60[0x1c]; + u8 voq_internal_prio[0x4]; +}; + +struct mlx5_ifc_ste_eth_l4_v1_bits { + u8 ipv6_version[0x4]; + u8 reserved_at_4[0x4]; + u8 dscp[0x6]; + u8 ecn[0x2]; + u8 ipv6_hop_limit[0x8]; + u8 protocol[0x8]; + + u8 src_port[0x10]; + u8 dst_port[0x10]; + + u8 first_fragment[0x1]; + u8 reserved_at_41[0xb]; + u8 flow_label[0x14]; + + u8 tcp_data_offset[0x4]; + u8 l4_ok[0x1]; + u8 l3_ok[0x1]; + u8 fragmented[0x1]; + u8 tcp_ns[0x1]; + u8 tcp_cwr[0x1]; + u8 tcp_ece[0x1]; + u8 tcp_urg[0x1]; + u8 tcp_ack[0x1]; + u8 tcp_psh[0x1]; + u8 tcp_rst[0x1]; + u8 tcp_syn[0x1]; + u8 tcp_fin[0x1]; + u8 ipv6_paylen[0x10]; +}; + +struct mlx5_ifc_ste_eth_l4_misc_v1_bits { + u8 window_size[0x10]; + u8 urgent_pointer[0x10]; + + u8 ack_num[0x20]; + + u8 seq_num[0x20]; + + u8 length[0x10]; + u8 checksum[0x10]; +}; + +struct mlx5_ifc_ste_mpls_v1_bits { + u8 reserved_at_0[0x15]; + u8 mpls_ok[0x1]; + u8 mpls4_s_bit[0x1]; + u8 mpls4_qualifier[0x1]; + u8 mpls3_s_bit[0x1]; + u8 mpls3_qualifier[0x1]; + u8 mpls2_s_bit[0x1]; + u8 mpls2_qualifier[0x1]; + u8 mpls1_s_bit[0x1]; + u8 mpls1_qualifier[0x1]; + u8 mpls0_s_bit[0x1]; + u8 mpls0_qualifier[0x1]; + + u8 mpls0_label[0x14]; + u8 mpls0_exp[0x3]; + u8 mpls0_s_bos[0x1]; + u8 mpls0_ttl[0x8]; + + u8 mpls1_label[0x20]; + + u8 mpls2_label[0x20]; +}; + +struct mlx5_ifc_ste_gre_v1_bits { + u8 gre_c_present[0x1]; + u8 reserved_at_1[0x1]; + u8 gre_k_present[0x1]; + u8 gre_s_present[0x1]; + u8 strict_src_route[0x1]; + u8 recur[0x3]; + u8 flags[0x5]; + u8 version[0x3]; + u8 gre_protocol[0x10]; + + u8 reserved_at_20[0x20]; + + u8 gre_key_h[0x18]; + u8 gre_key_l[0x8]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_ste_src_gvmi_qp_v1_bits { + u8 loopback_synd[0x8]; + u8 reserved_at_8[0x7]; + u8 functional_lb[0x1]; + u8 source_gvmi[0x10]; + + u8 force_lb[0x1]; + u8 reserved_at_21[0x1]; + u8 source_is_requestor[0x1]; + u8 reserved_at_23[0x5]; + u8 source_qp[0x18]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_ste_icmp_v1_bits { + u8 icmp_payload_data[0x20]; + + u8 icmp_header_data[0x20]; + + u8 icmp_type[0x8]; + u8 icmp_code[0x8]; + u8 reserved_at_50[0x10]; + + u8 reserved_at_60[0x20]; +}; + +#endif /* MLX5_IFC_DR_STE_V1_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h new file mode 100644 index 0000000000..89fced8693 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -0,0 +1,201 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019, Mellanox Technologies */ + +#ifndef _MLX5DR_H_ +#define _MLX5DR_H_ + +struct mlx5dr_domain; +struct mlx5dr_table; +struct mlx5dr_matcher; +struct mlx5dr_rule; +struct mlx5dr_action; + +enum mlx5dr_domain_type { + MLX5DR_DOMAIN_TYPE_NIC_RX, + MLX5DR_DOMAIN_TYPE_NIC_TX, + MLX5DR_DOMAIN_TYPE_FDB, +}; + +enum mlx5dr_domain_sync_flags { + MLX5DR_DOMAIN_SYNC_FLAGS_SW = 1 << 0, + MLX5DR_DOMAIN_SYNC_FLAGS_HW = 1 << 1, +}; + +enum mlx5dr_action_reformat_type { + DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2, + DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2, + DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2, + DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3, + DR_ACTION_REFORMAT_TYP_INSERT_HDR, + DR_ACTION_REFORMAT_TYP_REMOVE_HDR, +}; + +struct mlx5dr_match_parameters { + size_t match_sz; + u64 *match_buf; /* Device spec format */ +}; + +struct mlx5dr_action_dest { + struct mlx5dr_action *dest; + struct mlx5dr_action *reformat; +}; + +struct mlx5dr_domain * +mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type); + +int mlx5dr_domain_destroy(struct mlx5dr_domain *domain); + +int mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags); + +void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, + struct mlx5dr_domain *peer_dmn, + u16 peer_vhca_id); + +struct mlx5dr_table * +mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags, + u16 uid); + +struct mlx5dr_table * +mlx5dr_table_get_from_fs_ft(struct mlx5_flow_table *ft); + +int mlx5dr_table_destroy(struct mlx5dr_table *table); + +u32 mlx5dr_table_get_id(struct mlx5dr_table *table); + +struct mlx5dr_matcher * +mlx5dr_matcher_create(struct mlx5dr_table *table, + u32 priority, + u8 match_criteria_enable, + struct mlx5dr_match_parameters *mask); + +int mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher); + +struct mlx5dr_rule * +mlx5dr_rule_create(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *value, + size_t num_actions, + struct mlx5dr_action *actions[], + u32 flow_source); + +int mlx5dr_rule_destroy(struct mlx5dr_rule *rule); + +int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, + struct mlx5dr_action *action); + +struct mlx5dr_action * +mlx5dr_action_create_dest_table_num(struct mlx5dr_domain *dmn, u32 table_num); + +struct mlx5dr_action * +mlx5dr_action_create_dest_table(struct mlx5dr_table *table); + +struct mlx5dr_action * +mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *domain, + struct mlx5_flow_table *ft); + +struct mlx5dr_action * +mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain, + u16 vport, u8 vhca_id_valid, + u16 vhca_id); + +struct mlx5dr_action * +mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, + struct mlx5dr_action_dest *dests, + u32 num_of_dests, + bool ignore_flow_level, + u32 flow_source); + +struct mlx5dr_action *mlx5dr_action_create_drop(void); + +struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value); + +struct mlx5dr_action * +mlx5dr_action_create_flow_sampler(struct mlx5dr_domain *dmn, u32 sampler_id); + +struct mlx5dr_action * +mlx5dr_action_create_flow_counter(u32 counter_id); + +struct mlx5dr_action * +mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn, + enum mlx5dr_action_reformat_type reformat_type, + u8 reformat_param_0, + u8 reformat_param_1, + size_t data_sz, + void *data); + +struct mlx5dr_action * +mlx5dr_action_create_modify_header(struct mlx5dr_domain *domain, + u32 flags, + size_t actions_sz, + __be64 actions[]); + +struct mlx5dr_action *mlx5dr_action_create_pop_vlan(void); + +struct mlx5dr_action * +mlx5dr_action_create_push_vlan(struct mlx5dr_domain *domain, __be32 vlan_hdr); + +struct mlx5dr_action * +mlx5dr_action_create_aso(struct mlx5dr_domain *dmn, + u32 obj_id, + u8 return_reg_id, + u8 aso_type, + u8 init_color, + u8 meter_id); + +struct mlx5dr_action * +mlx5dr_action_create_dest_match_range(struct mlx5dr_domain *dmn, + u32 field, + struct mlx5_flow_table *hit_ft, + struct mlx5_flow_table *miss_ft, + u32 min, + u32 max); + +int mlx5dr_action_destroy(struct mlx5dr_action *action); + +u32 mlx5dr_action_get_pkt_reformat_id(struct mlx5dr_action *action); + +int mlx5dr_definer_get(struct mlx5dr_domain *dmn, u16 format_id, + u8 *dw_selectors, u8 *byte_selectors, + u8 *match_mask, u32 *definer_id); +void mlx5dr_definer_put(struct mlx5dr_domain *dmn, u32 definer_id); + +static inline bool +mlx5dr_is_supported(struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN(dev, roce) && + (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) || + (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) && + (MLX5_CAP_GEN(dev, steering_format_version) <= + MLX5_STEERING_FORMAT_CONNECTX_7))); +} + +/* buddy functions & structure */ + +struct mlx5dr_icm_mr; + +struct mlx5dr_icm_buddy_mem { + unsigned long **bitmap; + unsigned int *num_free; + u32 max_order; + struct list_head list_node; + struct mlx5dr_icm_mr *icm_mr; + struct mlx5dr_icm_pool *pool; + + /* Amount of memory in used chunks - HW may be accessing this memory */ + u64 used_memory; + + /* Memory optimisation */ + struct mlx5dr_ste *ste_arr; + struct list_head *miss_list; + u8 *hw_ste_arr; +}; + +int mlx5dr_buddy_init(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int max_order); +void mlx5dr_buddy_cleanup(struct mlx5dr_icm_buddy_mem *buddy); +int mlx5dr_buddy_alloc_mem(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int order, + unsigned int *segment); +void mlx5dr_buddy_free_mem(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int seg, unsigned int order); + +#endif /* _MLX5DR_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c new file mode 100644 index 0000000000..b6931bbe52 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -0,0 +1,505 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include "mlx5_core.h" +#include + +int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn) +{ + u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {}; + int err; + + MLX5_SET(alloc_transport_domain_in, in, opcode, + MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN); + + err = mlx5_cmd_exec_inout(dev, alloc_transport_domain, in, out); + if (!err) + *tdn = MLX5_GET(alloc_transport_domain_out, out, + transport_domain); + + return err; +} +EXPORT_SYMBOL(mlx5_core_alloc_transport_domain); + +void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {}; + + MLX5_SET(dealloc_transport_domain_in, in, opcode, + MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); + MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn); + mlx5_cmd_exec_in(dev, dealloc_transport_domain, in); +} +EXPORT_SYMBOL(mlx5_core_dealloc_transport_domain); + +int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn) +{ + u32 out[MLX5_ST_SZ_DW(create_rq_out)] = {}; + int err; + + MLX5_SET(create_rq_in, in, opcode, MLX5_CMD_OP_CREATE_RQ); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + if (!err) + *rqn = MLX5_GET(create_rq_out, out, rqn); + + return err; +} +EXPORT_SYMBOL(mlx5_core_create_rq); + +int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in) +{ + MLX5_SET(modify_rq_in, in, rqn, rqn); + MLX5_SET(modify_rq_in, in, opcode, MLX5_CMD_OP_MODIFY_RQ); + + return mlx5_cmd_exec_in(dev, modify_rq, in); +} +EXPORT_SYMBOL(mlx5_core_modify_rq); + +void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rq_in)] = {}; + + MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ); + MLX5_SET(destroy_rq_in, in, rqn, rqn); + mlx5_cmd_exec_in(dev, destroy_rq, in); +} +EXPORT_SYMBOL(mlx5_core_destroy_rq); + +int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out) +{ + u32 in[MLX5_ST_SZ_DW(query_rq_in)] = {}; + + MLX5_SET(query_rq_in, in, opcode, MLX5_CMD_OP_QUERY_RQ); + MLX5_SET(query_rq_in, in, rqn, rqn); + + return mlx5_cmd_exec_inout(dev, query_rq, in, out); +} +EXPORT_SYMBOL(mlx5_core_query_rq); + +int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn) +{ + u32 out[MLX5_ST_SZ_DW(create_sq_out)] = {}; + int err; + + MLX5_SET(create_sq_in, in, opcode, MLX5_CMD_OP_CREATE_SQ); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + if (!err) + *sqn = MLX5_GET(create_sq_out, out, sqn); + + return err; +} + +int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in) +{ + MLX5_SET(modify_sq_in, in, sqn, sqn); + MLX5_SET(modify_sq_in, in, opcode, MLX5_CMD_OP_MODIFY_SQ); + return mlx5_cmd_exec_in(dev, modify_sq, in); +} +EXPORT_SYMBOL(mlx5_core_modify_sq); + +void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_sq_in)] = {}; + + MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ); + MLX5_SET(destroy_sq_in, in, sqn, sqn); + mlx5_cmd_exec_in(dev, destroy_sq, in); +} + +int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out) +{ + u32 in[MLX5_ST_SZ_DW(query_sq_in)] = {}; + + MLX5_SET(query_sq_in, in, opcode, MLX5_CMD_OP_QUERY_SQ); + MLX5_SET(query_sq_in, in, sqn, sqn); + return mlx5_cmd_exec_inout(dev, query_sq, in, out); +} +EXPORT_SYMBOL(mlx5_core_query_sq); + +int mlx5_core_query_sq_state(struct mlx5_core_dev *dev, u32 sqn, u8 *state) +{ + void *out; + void *sqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(query_sq_out); + out = kvzalloc(inlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_core_query_sq(dev, sqn, out); + if (err) + goto out; + + sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context); + *state = MLX5_GET(sqc, sqc, state); + +out: + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_query_sq_state); + +int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, u32 *tirn) +{ + u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {}; + int err; + + MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR); + err = mlx5_cmd_exec_inout(dev, create_tir, in, out); + if (!err) + *tirn = MLX5_GET(create_tir_out, out, tirn); + + return err; +} +EXPORT_SYMBOL(mlx5_core_create_tir); + +int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in) +{ + MLX5_SET(modify_tir_in, in, tirn, tirn); + MLX5_SET(modify_tir_in, in, opcode, MLX5_CMD_OP_MODIFY_TIR); + return mlx5_cmd_exec_in(dev, modify_tir, in); +} + +void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {}; + + MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR); + MLX5_SET(destroy_tir_in, in, tirn, tirn); + mlx5_cmd_exec_in(dev, destroy_tir, in); +} +EXPORT_SYMBOL(mlx5_core_destroy_tir); + +int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, u32 *tisn) +{ + u32 out[MLX5_ST_SZ_DW(create_tis_out)] = {}; + int err; + + MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS); + err = mlx5_cmd_exec_inout(dev, create_tis, in, out); + if (!err) + *tisn = MLX5_GET(create_tis_out, out, tisn); + + return err; +} +EXPORT_SYMBOL(mlx5_core_create_tis); + +int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in) +{ + MLX5_SET(modify_tis_in, in, tisn, tisn); + MLX5_SET(modify_tis_in, in, opcode, MLX5_CMD_OP_MODIFY_TIS); + + return mlx5_cmd_exec_in(dev, modify_tis, in); +} +EXPORT_SYMBOL(mlx5_core_modify_tis); + +void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {}; + + MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, in, tisn, tisn); + mlx5_cmd_exec_in(dev, destroy_tis, in); +} +EXPORT_SYMBOL(mlx5_core_destroy_tis); + +int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rqtn) +{ + u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {}; + int err; + + MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + if (!err) + *rqtn = MLX5_GET(create_rqt_out, out, rqtn); + + return err; +} +EXPORT_SYMBOL(mlx5_core_create_rqt); + +int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in, + int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_rqt_out)] = {}; + + MLX5_SET(modify_rqt_in, in, rqtn, rqtn); + MLX5_SET(modify_rqt_in, in, opcode, MLX5_CMD_OP_MODIFY_RQT); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +} + +void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {}; + + MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT); + MLX5_SET(destroy_rqt_in, in, rqtn, rqtn); + mlx5_cmd_exec_in(dev, destroy_rqt, in); +} +EXPORT_SYMBOL(mlx5_core_destroy_rqt); + +static int mlx5_hairpin_create_rq(struct mlx5_core_dev *mdev, + struct mlx5_hairpin_params *params, u32 *rqn) +{ + u32 in[MLX5_ST_SZ_DW(create_rq_in)] = {0}; + void *rqc, *wq; + + rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); + wq = MLX5_ADDR_OF(rqc, rqc, wq); + + MLX5_SET(rqc, rqc, hairpin, 1); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); + MLX5_SET(rqc, rqc, counter_set_id, params->q_counter); + + MLX5_SET(wq, wq, log_hairpin_data_sz, params->log_data_size); + MLX5_SET(wq, wq, log_hairpin_num_packets, params->log_num_packets); + + return mlx5_core_create_rq(mdev, in, MLX5_ST_SZ_BYTES(create_rq_in), rqn); +} + +static int mlx5_hairpin_create_sq(struct mlx5_core_dev *mdev, + struct mlx5_hairpin_params *params, u32 *sqn) +{ + u32 in[MLX5_ST_SZ_DW(create_sq_in)] = {0}; + void *sqc, *wq; + + sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); + wq = MLX5_ADDR_OF(sqc, sqc, wq); + + MLX5_SET(sqc, sqc, hairpin, 1); + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); + + MLX5_SET(wq, wq, log_hairpin_data_sz, params->log_data_size); + MLX5_SET(wq, wq, log_hairpin_num_packets, params->log_num_packets); + + return mlx5_core_create_sq(mdev, in, MLX5_ST_SZ_BYTES(create_sq_in), sqn); +} + +static int mlx5_hairpin_create_queues(struct mlx5_hairpin *hp, + struct mlx5_hairpin_params *params) +{ + int i, j, err; + + for (i = 0; i < hp->num_channels; i++) { + err = mlx5_hairpin_create_rq(hp->func_mdev, params, &hp->rqn[i]); + if (err) + goto out_err_rq; + } + + for (i = 0; i < hp->num_channels; i++) { + err = mlx5_hairpin_create_sq(hp->peer_mdev, params, &hp->sqn[i]); + if (err) + goto out_err_sq; + } + + return 0; + +out_err_sq: + for (j = 0; j < i; j++) + mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[j]); + i = hp->num_channels; +out_err_rq: + for (j = 0; j < i; j++) + mlx5_core_destroy_rq(hp->func_mdev, hp->rqn[j]); + return err; +} + +static void mlx5_hairpin_destroy_queues(struct mlx5_hairpin *hp) +{ + int i; + + for (i = 0; i < hp->num_channels; i++) { + mlx5_core_destroy_rq(hp->func_mdev, hp->rqn[i]); + if (!hp->peer_gone) + mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]); + } +} + +static int mlx5_hairpin_modify_rq(struct mlx5_core_dev *func_mdev, u32 rqn, + int curr_state, int next_state, + u16 peer_vhca, u32 peer_sq) +{ + u32 in[MLX5_ST_SZ_DW(modify_rq_in)] = {}; + void *rqc; + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + + if (next_state == MLX5_RQC_STATE_RDY) { + MLX5_SET(rqc, rqc, hairpin_peer_sq, peer_sq); + MLX5_SET(rqc, rqc, hairpin_peer_vhca, peer_vhca); + } + + MLX5_SET(modify_rq_in, in, rq_state, curr_state); + MLX5_SET(rqc, rqc, state, next_state); + + return mlx5_core_modify_rq(func_mdev, rqn, in); +} + +static int mlx5_hairpin_modify_sq(struct mlx5_core_dev *peer_mdev, u32 sqn, + int curr_state, int next_state, + u16 peer_vhca, u32 peer_rq) +{ + u32 in[MLX5_ST_SZ_DW(modify_sq_in)] = {0}; + void *sqc; + + sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); + + if (next_state == MLX5_SQC_STATE_RDY) { + MLX5_SET(sqc, sqc, hairpin_peer_rq, peer_rq); + MLX5_SET(sqc, sqc, hairpin_peer_vhca, peer_vhca); + } + + MLX5_SET(modify_sq_in, in, sq_state, curr_state); + MLX5_SET(sqc, sqc, state, next_state); + + return mlx5_core_modify_sq(peer_mdev, sqn, in); +} + +static int mlx5_hairpin_pair_queues(struct mlx5_hairpin *hp) +{ + int i, j, err; + + /* set peer SQs */ + for (i = 0; i < hp->num_channels; i++) { + err = mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], + MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY, + MLX5_CAP_GEN(hp->func_mdev, vhca_id), hp->rqn[i]); + if (err) + goto err_modify_sq; + } + + /* set func RQs */ + for (i = 0; i < hp->num_channels; i++) { + err = mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[i], + MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY, + MLX5_CAP_GEN(hp->peer_mdev, vhca_id), hp->sqn[i]); + if (err) + goto err_modify_rq; + } + + return 0; + +err_modify_rq: + for (j = 0; j < i; j++) + mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[j], MLX5_RQC_STATE_RDY, + MLX5_RQC_STATE_RST, 0, 0); + i = hp->num_channels; +err_modify_sq: + for (j = 0; j < i; j++) + mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[j], MLX5_SQC_STATE_RDY, + MLX5_SQC_STATE_RST, 0, 0); + return err; +} + +static void mlx5_hairpin_unpair_peer_sq(struct mlx5_hairpin *hp) +{ + int i; + + for (i = 0; i < hp->num_channels; i++) + mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY, + MLX5_SQC_STATE_RST, 0, 0); +} + +static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp) +{ + int i; + + /* unset func RQs */ + for (i = 0; i < hp->num_channels; i++) + mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[i], MLX5_RQC_STATE_RDY, + MLX5_RQC_STATE_RST, 0, 0); + /* unset peer SQs */ + if (!hp->peer_gone) + mlx5_hairpin_unpair_peer_sq(hp); +} + +struct mlx5_hairpin * +mlx5_core_hairpin_create(struct mlx5_core_dev *func_mdev, + struct mlx5_core_dev *peer_mdev, + struct mlx5_hairpin_params *params) +{ + struct mlx5_hairpin *hp; + int size, err; + + size = sizeof(*hp) + params->num_channels * 2 * sizeof(u32); + hp = kzalloc(size, GFP_KERNEL); + if (!hp) + return ERR_PTR(-ENOMEM); + + hp->func_mdev = func_mdev; + hp->peer_mdev = peer_mdev; + hp->num_channels = params->num_channels; + + hp->rqn = (void *)hp + sizeof(*hp); + hp->sqn = hp->rqn + params->num_channels; + + /* alloc and pair func --> peer hairpin */ + err = mlx5_hairpin_create_queues(hp, params); + if (err) + goto err_create_queues; + + err = mlx5_hairpin_pair_queues(hp); + if (err) + goto err_pair_queues; + + return hp; + +err_pair_queues: + mlx5_hairpin_destroy_queues(hp); +err_create_queues: + kfree(hp); + return ERR_PTR(err); +} + +void mlx5_core_hairpin_destroy(struct mlx5_hairpin *hp) +{ + mlx5_hairpin_unpair_queues(hp); + mlx5_hairpin_destroy_queues(hp); + kfree(hp); +} + +void mlx5_core_hairpin_clear_dead_peer(struct mlx5_hairpin *hp) +{ + int i; + + mlx5_hairpin_unpair_peer_sq(hp); + + /* destroy peer SQ */ + for (i = 0; i < hp->num_channels; i++) + mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]); + + hp->peer_gone = true; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c new file mode 100644 index 0000000000..1513112ece --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -0,0 +1,327 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include "mlx5_core.h" + +static int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn) +{ + u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_uar_in)] = {}; + int err; + + MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR); + err = mlx5_cmd_exec_inout(dev, alloc_uar, in, out); + if (err) + return err; + + *uarn = MLX5_GET(alloc_uar_out, out, uar); + return 0; +} + +static int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)] = {}; + + MLX5_SET(dealloc_uar_in, in, opcode, MLX5_CMD_OP_DEALLOC_UAR); + MLX5_SET(dealloc_uar_in, in, uar, uarn); + return mlx5_cmd_exec_in(dev, dealloc_uar, in); +} + +static int uars_per_sys_page(struct mlx5_core_dev *mdev) +{ + if (MLX5_CAP_GEN(mdev, uar_4k)) + return MLX5_CAP_GEN(mdev, num_of_uars_per_page); + + return 1; +} + +static u64 uar2pfn(struct mlx5_core_dev *mdev, u32 index) +{ + u32 system_page_index; + + if (MLX5_CAP_GEN(mdev, uar_4k)) + system_page_index = index >> (PAGE_SHIFT - MLX5_ADAPTER_PAGE_SHIFT); + else + system_page_index = index; + + return (mdev->bar_addr >> PAGE_SHIFT) + system_page_index; +} + +static void up_rel_func(struct kref *kref) +{ + struct mlx5_uars_page *up = container_of(kref, struct mlx5_uars_page, ref_count); + + list_del(&up->list); + iounmap(up->map); + if (mlx5_cmd_free_uar(up->mdev, up->index)) + mlx5_core_warn(up->mdev, "failed to free uar index %d\n", up->index); + bitmap_free(up->reg_bitmap); + bitmap_free(up->fp_bitmap); + kfree(up); +} + +static struct mlx5_uars_page *alloc_uars_page(struct mlx5_core_dev *mdev, + bool map_wc) +{ + struct mlx5_uars_page *up; + int err = -ENOMEM; + phys_addr_t pfn; + int bfregs; + int node; + int i; + + bfregs = uars_per_sys_page(mdev) * MLX5_BFREGS_PER_UAR; + node = mdev->priv.numa_node; + up = kzalloc_node(sizeof(*up), GFP_KERNEL, node); + if (!up) + return ERR_PTR(err); + + up->mdev = mdev; + up->reg_bitmap = bitmap_zalloc_node(bfregs, GFP_KERNEL, node); + if (!up->reg_bitmap) + goto error1; + + up->fp_bitmap = bitmap_zalloc_node(bfregs, GFP_KERNEL, node); + if (!up->fp_bitmap) + goto error1; + + for (i = 0; i < bfregs; i++) + if ((i % MLX5_BFREGS_PER_UAR) < MLX5_NON_FP_BFREGS_PER_UAR) + set_bit(i, up->reg_bitmap); + else + set_bit(i, up->fp_bitmap); + + up->bfregs = bfregs; + up->fp_avail = bfregs * MLX5_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR; + up->reg_avail = bfregs * MLX5_NON_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR; + + err = mlx5_cmd_alloc_uar(mdev, &up->index); + if (err) { + mlx5_core_warn(mdev, "mlx5_cmd_alloc_uar() failed, %d\n", err); + goto error1; + } + + pfn = uar2pfn(mdev, up->index); + if (map_wc) { + up->map = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!up->map) { + err = -EAGAIN; + goto error2; + } + } else { + up->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!up->map) { + err = -ENOMEM; + goto error2; + } + } + kref_init(&up->ref_count); + mlx5_core_dbg(mdev, "allocated UAR page: index %d, total bfregs %d\n", + up->index, up->bfregs); + return up; + +error2: + if (mlx5_cmd_free_uar(mdev, up->index)) + mlx5_core_warn(mdev, "failed to free uar index %d\n", up->index); +error1: + bitmap_free(up->fp_bitmap); + bitmap_free(up->reg_bitmap); + kfree(up); + return ERR_PTR(err); +} + +struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev) +{ + struct mlx5_uars_page *ret; + + mutex_lock(&mdev->priv.bfregs.reg_head.lock); + if (!list_empty(&mdev->priv.bfregs.reg_head.list)) { + ret = list_first_entry(&mdev->priv.bfregs.reg_head.list, + struct mlx5_uars_page, list); + kref_get(&ret->ref_count); + goto out; + } + ret = alloc_uars_page(mdev, false); + if (IS_ERR(ret)) + goto out; + list_add(&ret->list, &mdev->priv.bfregs.reg_head.list); +out: + mutex_unlock(&mdev->priv.bfregs.reg_head.lock); + + return ret; +} +EXPORT_SYMBOL(mlx5_get_uars_page); + +void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up) +{ + mutex_lock(&mdev->priv.bfregs.reg_head.lock); + kref_put(&up->ref_count, up_rel_func); + mutex_unlock(&mdev->priv.bfregs.reg_head.lock); +} +EXPORT_SYMBOL(mlx5_put_uars_page); + +static unsigned long map_offset(struct mlx5_core_dev *mdev, int dbi) +{ + /* return the offset in bytes from the start of the page to the + * blue flame area of the UAR + */ + return dbi / MLX5_BFREGS_PER_UAR * MLX5_ADAPTER_PAGE_SIZE + + (dbi % MLX5_BFREGS_PER_UAR) * + (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) + MLX5_BF_OFFSET; +} + +static int alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, + bool map_wc, bool fast_path) +{ + struct mlx5_bfreg_data *bfregs; + struct mlx5_uars_page *up; + struct list_head *head; + unsigned long *bitmap; + unsigned int *avail; + struct mutex *lock; /* pointer to right mutex */ + int dbi; + + bfregs = &mdev->priv.bfregs; + if (map_wc) { + head = &bfregs->wc_head.list; + lock = &bfregs->wc_head.lock; + } else { + head = &bfregs->reg_head.list; + lock = &bfregs->reg_head.lock; + } + mutex_lock(lock); + if (list_empty(head)) { + up = alloc_uars_page(mdev, map_wc); + if (IS_ERR(up)) { + mutex_unlock(lock); + return PTR_ERR(up); + } + list_add(&up->list, head); + } else { + up = list_entry(head->next, struct mlx5_uars_page, list); + kref_get(&up->ref_count); + } + if (fast_path) { + bitmap = up->fp_bitmap; + avail = &up->fp_avail; + } else { + bitmap = up->reg_bitmap; + avail = &up->reg_avail; + } + dbi = find_first_bit(bitmap, up->bfregs); + clear_bit(dbi, bitmap); + (*avail)--; + if (!(*avail)) + list_del(&up->list); + + bfreg->map = up->map + map_offset(mdev, dbi); + bfreg->up = up; + bfreg->wc = map_wc; + bfreg->index = up->index + dbi / MLX5_BFREGS_PER_UAR; + mutex_unlock(lock); + + return 0; +} + +int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, + bool map_wc, bool fast_path) +{ + int err; + + err = alloc_bfreg(mdev, bfreg, map_wc, fast_path); + if (!err) + return 0; + + if (err == -EAGAIN && map_wc) + return alloc_bfreg(mdev, bfreg, false, fast_path); + + return err; +} +EXPORT_SYMBOL(mlx5_alloc_bfreg); + +static unsigned int addr_to_dbi_in_syspage(struct mlx5_core_dev *dev, + struct mlx5_uars_page *up, + struct mlx5_sq_bfreg *bfreg) +{ + unsigned int uar_idx; + unsigned int bfreg_idx; + unsigned int bf_reg_size; + + bf_reg_size = 1 << MLX5_CAP_GEN(dev, log_bf_reg_size); + + uar_idx = (bfreg->map - up->map) >> MLX5_ADAPTER_PAGE_SHIFT; + bfreg_idx = (((uintptr_t)bfreg->map % MLX5_ADAPTER_PAGE_SIZE) - MLX5_BF_OFFSET) / bf_reg_size; + + return uar_idx * MLX5_BFREGS_PER_UAR + bfreg_idx; +} + +void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg) +{ + struct mlx5_bfreg_data *bfregs; + struct mlx5_uars_page *up; + struct mutex *lock; /* pointer to right mutex */ + unsigned int dbi; + bool fp; + unsigned int *avail; + unsigned long *bitmap; + struct list_head *head; + + bfregs = &mdev->priv.bfregs; + if (bfreg->wc) { + head = &bfregs->wc_head.list; + lock = &bfregs->wc_head.lock; + } else { + head = &bfregs->reg_head.list; + lock = &bfregs->reg_head.lock; + } + up = bfreg->up; + dbi = addr_to_dbi_in_syspage(mdev, up, bfreg); + fp = (dbi % MLX5_BFREGS_PER_UAR) >= MLX5_NON_FP_BFREGS_PER_UAR; + if (fp) { + avail = &up->fp_avail; + bitmap = up->fp_bitmap; + } else { + avail = &up->reg_avail; + bitmap = up->reg_bitmap; + } + mutex_lock(lock); + (*avail)++; + set_bit(dbi, bitmap); + if (*avail == 1) + list_add_tail(&up->list, head); + + kref_put(&up->ref_count, up_rel_func); + mutex_unlock(lock); +} +EXPORT_SYMBOL(mlx5_free_bfreg); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c new file mode 100644 index 0000000000..21753f3278 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -0,0 +1,1205 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include "mlx5_core.h" +#include "sf/sf.h" + +/* Mutex to hold while enabling or disabling RoCE */ +static DEFINE_MUTEX(mlx5_roce_en_lock); + +u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) +{ + u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {}; + int err; + + MLX5_SET(query_vport_state_in, in, opcode, + MLX5_CMD_OP_QUERY_VPORT_STATE); + MLX5_SET(query_vport_state_in, in, op_mod, opmod); + MLX5_SET(query_vport_state_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_vport_state_in, in, other_vport, 1); + + err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out); + if (err) + return 0; + + return MLX5_GET(query_vport_state_out, out, state); +} + +int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, + u16 vport, u8 other_vport, u8 state) +{ + u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {}; + + MLX5_SET(modify_vport_state_in, in, opcode, + MLX5_CMD_OP_MODIFY_VPORT_STATE); + MLX5_SET(modify_vport_state_in, in, op_mod, opmod); + MLX5_SET(modify_vport_state_in, in, vport_number, vport); + MLX5_SET(modify_vport_state_in, in, other_vport, other_vport); + MLX5_SET(modify_vport_state_in, in, admin_state, state); + + return mlx5_cmd_exec_in(mdev, modify_vport_state, in); +} + +static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport, + u32 *out) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {}; + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + + return mlx5_cmd_exec_inout(mdev, query_nic_vport_context, in, out); +} + +int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev, + u16 vport, u8 *min_inline) +{ + u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {}; + int err; + + err = mlx5_query_nic_vport_context(mdev, vport, out); + if (!err) + *min_inline = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.min_wqe_inline_mode); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_min_inline); + +void mlx5_query_min_inline(struct mlx5_core_dev *mdev, + u8 *min_inline_mode) +{ + switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) { + case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: + if (!mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode)) + break; + fallthrough; + case MLX5_CAP_INLINE_MODE_L2: + *min_inline_mode = MLX5_INLINE_MODE_L2; + break; + case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: + *min_inline_mode = MLX5_INLINE_MODE_NONE; + break; + } +} +EXPORT_SYMBOL_GPL(mlx5_query_min_inline); + +int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev, + u16 vport, u8 min_inline) +{ + u32 in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {}; + void *nic_vport_ctx; + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.min_inline, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + MLX5_SET(nic_vport_context, nic_vport_ctx, + min_wqe_inline_mode, min_inline); + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + return mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in); +} + +int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, + u16 vport, bool other, u8 *addr) +{ + u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {}; + u8 *out_addr; + int err; + + out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context.permanent_address); + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(query_nic_vport_context_in, in, other_vport, other); + + err = mlx5_cmd_exec_inout(mdev, query_nic_vport_context, in, out); + if (!err) + ether_addr_copy(addr, &out_addr[2]); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_address); + +int mlx5_query_mac_address(struct mlx5_core_dev *mdev, u8 *addr) +{ + return mlx5_query_nic_vport_mac_address(mdev, 0, false, addr); +} +EXPORT_SYMBOL_GPL(mlx5_query_mac_address); + +int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev, + u16 vport, const u8 *addr) +{ + void *in; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + int err; + void *nic_vport_ctx; + u8 *perm_mac; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.permanent_address, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + perm_mac = MLX5_ADDR_OF(nic_vport_context, nic_vport_ctx, + permanent_address); + + ether_addr_copy(&perm_mac[2], addr); + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in); + + kvfree(in); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_address); + +int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu) +{ + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + u32 *out; + int err; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_query_nic_vport_context(mdev, 0, out); + if (!err) + *mtu = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.mtu); + + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mtu); + +int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1); + MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu, mtu); + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in); + + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mtu); + +int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, + u16 vport, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int *list_size) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {0}; + void *nic_vport_ctx; + int max_list_size; + int req_list_size; + int out_sz; + void *out; + int err; + int i; + + req_list_size = *list_size; + + max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ? + 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list); + + if (req_list_size > max_list_size) { + mlx5_core_warn(dev, "Requested list size (%d) > (%d) max_list_size\n", + req_list_size, max_list_size); + req_list_size = max_list_size; + } + + out_sz = MLX5_ST_SZ_BYTES(query_nic_vport_context_out) + + req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout); + + out = kvzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + if (vport || mlx5_core_is_ecpf(dev)) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); + if (err) + goto out; + + nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context); + req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx, + allowed_list_size); + + *list_size = req_list_size; + for (i = 0; i < req_list_size; i++) { + u8 *mac_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]) + 2; + ether_addr_copy(addr_list[i], mac_addr); + } +out: + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_list); + +int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int list_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {}; + void *nic_vport_ctx; + int max_list_size; + int in_sz; + void *in; + int err; + int i; + + max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ? + 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list); + + if (list_size > max_list_size) + return -ENOSPC; + + in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + list_size * MLX5_ST_SZ_BYTES(mac_address_layout); + + in = kvzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, + field_select.addresses_list, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, + nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_type, list_type); + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_size, list_size); + + for (i = 0; i < list_size; i++) { + u8 *curr_mac = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]) + 2; + ether_addr_copy(curr_mac, addr_list[i]); + } + + err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list); + +int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, + u16 vlans[], + int list_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int max_list_size; + int in_sz; + void *in; + int err; + int i; + + max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list); + + if (list_size > max_list_size) + return -ENOSPC; + + in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + list_size * MLX5_ST_SZ_BYTES(vlan_layout); + + memset(out, 0, sizeof(out)); + in = kvzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, + field_select.addresses_list, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, + nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_type, MLX5_NVPRT_LIST_TYPE_VLAN); + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_size, list_size); + + for (i = 0; i < list_size; i++) { + void *vlan_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]); + MLX5_SET(vlan_layout, vlan_addr, vlan, vlans[i]); + } + + err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans); + +int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, + u64 *system_image_guid) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + int err; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_query_nic_vport_context(mdev, 0, out); + if (err) + goto out; + + *system_image_guid = MLX5_GET64(query_nic_vport_context_out, out, + nic_vport_context.system_image_guid); +out: + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_system_image_guid); + +int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + mlx5_query_nic_vport_context(mdev, 0, out); + + *node_guid = MLX5_GET64(query_nic_vport_context_out, out, + nic_vport_context.node_guid); + + kvfree(out); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid); + +int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, + u16 vport, u64 node_guid) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *nic_vport_context; + void *in; + int err; + + if (!MLX5_CAP_GEN(mdev, vport_group_manager)) + return -EACCES; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.node_guid, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + + nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + MLX5_SET64(nic_vport_context, nic_vport_context, node_guid, node_guid); + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in); + + kvfree(in); + + return err; +} + +int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev, + u16 *qkey_viol_cntr) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + mlx5_query_nic_vport_context(mdev, 0, out); + + *qkey_viol_cntr = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.qkey_violation_counter); + + kvfree(out); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_qkey_viol_cntr); + +int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, + u8 port_num, u16 vf_num, u16 gid_index, + union ib_gid *gid) +{ + int in_sz = MLX5_ST_SZ_BYTES(query_hca_vport_gid_in); + int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_gid_out); + int is_group_manager; + void *out = NULL; + void *in = NULL; + union ib_gid *tmp; + int tbsz; + int nout; + int err; + + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + tbsz = mlx5_get_gid_table_len(MLX5_CAP_GEN(dev, gid_table_size)); + mlx5_core_dbg(dev, "vf_num %d, index %d, gid_table_size %d\n", + vf_num, gid_index, tbsz); + + if (gid_index > tbsz && gid_index != 0xffff) + return -EINVAL; + + if (gid_index == 0xffff) + nout = tbsz; + else + nout = 1; + + out_sz += nout * sizeof(*gid); + + in = kvzalloc(in_sz, GFP_KERNEL); + out = kvzalloc(out_sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(query_hca_vport_gid_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_GID); + if (other_vport) { + if (is_group_manager) { + MLX5_SET(query_hca_vport_gid_in, in, vport_number, vf_num); + MLX5_SET(query_hca_vport_gid_in, in, other_vport, 1); + } else { + err = -EPERM; + goto out; + } + } + MLX5_SET(query_hca_vport_gid_in, in, gid_index, gid_index); + + if (MLX5_CAP_GEN(dev, num_ports) == 2) + MLX5_SET(query_hca_vport_gid_in, in, port_num, port_num); + + err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz); + if (err) + goto out; + + tmp = out + MLX5_ST_SZ_BYTES(query_hca_vport_gid_out); + gid->global.subnet_prefix = tmp->global.subnet_prefix; + gid->global.interface_id = tmp->global.interface_id; + +out: + kvfree(in); + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_gid); + +int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport, + u8 port_num, u16 vf_num, u16 pkey_index, + u16 *pkey) +{ + int in_sz = MLX5_ST_SZ_BYTES(query_hca_vport_pkey_in); + int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_pkey_out); + int is_group_manager; + void *out = NULL; + void *in = NULL; + void *pkarr; + int nout; + int tbsz; + int err; + int i; + + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + + tbsz = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)); + if (pkey_index > tbsz && pkey_index != 0xffff) + return -EINVAL; + + if (pkey_index == 0xffff) + nout = tbsz; + else + nout = 1; + + out_sz += nout * MLX5_ST_SZ_BYTES(pkey); + + in = kvzalloc(in_sz, GFP_KERNEL); + out = kvzalloc(out_sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(query_hca_vport_pkey_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY); + if (other_vport) { + if (is_group_manager) { + MLX5_SET(query_hca_vport_pkey_in, in, vport_number, vf_num); + MLX5_SET(query_hca_vport_pkey_in, in, other_vport, 1); + } else { + err = -EPERM; + goto out; + } + } + MLX5_SET(query_hca_vport_pkey_in, in, pkey_index, pkey_index); + + if (MLX5_CAP_GEN(dev, num_ports) == 2) + MLX5_SET(query_hca_vport_pkey_in, in, port_num, port_num); + + err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz); + if (err) + goto out; + + pkarr = MLX5_ADDR_OF(query_hca_vport_pkey_out, out, pkey); + for (i = 0; i < nout; i++, pkey++, pkarr += MLX5_ST_SZ_BYTES(pkey)) + *pkey = MLX5_GET_PR(pkey, pkarr, pkey); + +out: + kvfree(in); + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_pkey); + +int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev, + u8 other_vport, u8 port_num, + u16 vf_num, + struct mlx5_hca_vport_context *rep) +{ + int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_context_out); + int in[MLX5_ST_SZ_DW(query_hca_vport_context_in)] = {}; + int is_group_manager; + void *out; + void *ctx; + int err; + + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + + out = kvzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_hca_vport_context_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT); + + if (other_vport) { + if (is_group_manager) { + MLX5_SET(query_hca_vport_context_in, in, other_vport, 1); + MLX5_SET(query_hca_vport_context_in, in, vport_number, vf_num); + } else { + err = -EPERM; + goto ex; + } + } + + if (MLX5_CAP_GEN(dev, num_ports) == 2) + MLX5_SET(query_hca_vport_context_in, in, port_num, port_num); + + err = mlx5_cmd_exec_inout(dev, query_hca_vport_context, in, out); + if (err) + goto ex; + + ctx = MLX5_ADDR_OF(query_hca_vport_context_out, out, hca_vport_context); + rep->field_select = MLX5_GET_PR(hca_vport_context, ctx, field_select); + rep->sm_virt_aware = MLX5_GET_PR(hca_vport_context, ctx, sm_virt_aware); + rep->has_smi = MLX5_GET_PR(hca_vport_context, ctx, has_smi); + rep->has_raw = MLX5_GET_PR(hca_vport_context, ctx, has_raw); + rep->policy = MLX5_GET_PR(hca_vport_context, ctx, vport_state_policy); + rep->phys_state = MLX5_GET_PR(hca_vport_context, ctx, + port_physical_state); + rep->vport_state = MLX5_GET_PR(hca_vport_context, ctx, vport_state); + rep->port_physical_state = MLX5_GET_PR(hca_vport_context, ctx, + port_physical_state); + rep->port_guid = MLX5_GET64_PR(hca_vport_context, ctx, port_guid); + rep->node_guid = MLX5_GET64_PR(hca_vport_context, ctx, node_guid); + rep->cap_mask1 = MLX5_GET_PR(hca_vport_context, ctx, cap_mask1); + rep->cap_mask1_perm = MLX5_GET_PR(hca_vport_context, ctx, + cap_mask1_field_select); + rep->cap_mask2 = MLX5_GET_PR(hca_vport_context, ctx, cap_mask2); + rep->cap_mask2_perm = MLX5_GET_PR(hca_vport_context, ctx, + cap_mask2_field_select); + rep->lid = MLX5_GET_PR(hca_vport_context, ctx, lid); + rep->init_type_reply = MLX5_GET_PR(hca_vport_context, ctx, + init_type_reply); + rep->lmc = MLX5_GET_PR(hca_vport_context, ctx, lmc); + rep->subnet_timeout = MLX5_GET_PR(hca_vport_context, ctx, + subnet_timeout); + rep->sm_lid = MLX5_GET_PR(hca_vport_context, ctx, sm_lid); + rep->sm_sl = MLX5_GET_PR(hca_vport_context, ctx, sm_sl); + rep->qkey_violation_counter = MLX5_GET_PR(hca_vport_context, ctx, + qkey_violation_counter); + rep->pkey_violation_counter = MLX5_GET_PR(hca_vport_context, ctx, + pkey_violation_counter); + rep->grh_required = MLX5_GET_PR(hca_vport_context, ctx, grh_required); + rep->sys_image_guid = MLX5_GET64_PR(hca_vport_context, ctx, + system_image_guid); + +ex: + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_context); + +int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev, + u64 *sys_image_guid) +{ + struct mlx5_hca_vport_context *rep; + int err; + + rep = kvzalloc(sizeof(*rep), GFP_KERNEL); + if (!rep) + return -ENOMEM; + + err = mlx5_query_hca_vport_context(dev, 0, 1, 0, rep); + if (!err) + *sys_image_guid = rep->sys_image_guid; + + kvfree(rep); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_system_image_guid); + +int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, + u64 *node_guid) +{ + struct mlx5_hca_vport_context *rep; + int err; + + rep = kvzalloc(sizeof(*rep), GFP_KERNEL); + if (!rep) + return -ENOMEM; + + err = mlx5_query_hca_vport_context(dev, 0, 1, 0, rep); + if (!err) + *node_guid = rep->node_guid; + + kvfree(rep); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid); + +int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev, + u16 vport, + int *promisc_uc, + int *promisc_mc, + int *promisc_all) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + int err; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_query_nic_vport_context(mdev, vport, out); + if (err) + goto out; + + *promisc_uc = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_uc); + *promisc_mc = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_mc); + *promisc_all = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_all); + +out: + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_promisc); + +int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, + int promisc_uc, + int promisc_mc, + int promisc_all) +{ + void *in; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, field_select.promisc, 1); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_uc, promisc_uc); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_mc, promisc_mc); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_all, promisc_all); + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in); + + kvfree(in); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc); + +enum { + UC_LOCAL_LB, + MC_LOCAL_LB +}; + +int mlx5_nic_vport_update_local_lb(struct mlx5_core_dev *mdev, bool enable) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *in; + int err; + + if (!MLX5_CAP_GEN(mdev, disable_local_lb_mc) && + !MLX5_CAP_GEN(mdev, disable_local_lb_uc)) + return 0; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.disable_mc_local_lb, !enable); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.disable_uc_local_lb, !enable); + + if (MLX5_CAP_GEN(mdev, disable_local_lb_mc)) + MLX5_SET(modify_nic_vport_context_in, in, + field_select.disable_mc_local_lb, 1); + + if (MLX5_CAP_GEN(mdev, disable_local_lb_uc)) + MLX5_SET(modify_nic_vport_context_in, in, + field_select.disable_uc_local_lb, 1); + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in); + + if (!err) + mlx5_core_dbg(mdev, "%s local_lb\n", + enable ? "enable" : "disable"); + + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_update_local_lb); + +int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status) +{ + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + u32 *out; + int value; + int err; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_query_nic_vport_context(mdev, 0, out); + if (err) + goto out; + + value = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.disable_mc_local_lb) << MC_LOCAL_LB; + + value |= MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.disable_uc_local_lb) << UC_LOCAL_LB; + + *status = !value; + +out: + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_query_local_lb); + +enum mlx5_vport_roce_state { + MLX5_VPORT_ROCE_DISABLED = 0, + MLX5_VPORT_ROCE_ENABLED = 1, +}; + +static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev, + enum mlx5_vport_roce_state state) +{ + void *in; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, field_select.roce_en, 1); + MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.roce_en, + state); + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in); + + kvfree(in); + + return err; +} + +int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev) +{ + int err = 0; + + mutex_lock(&mlx5_roce_en_lock); + if (!mdev->roce.roce_en) + err = mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED); + + if (!err) + mdev->roce.roce_en++; + mutex_unlock(&mlx5_roce_en_lock); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce); + +int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev) +{ + int err = 0; + + mutex_lock(&mlx5_roce_en_lock); + if (mdev->roce.roce_en) { + mdev->roce.roce_en--; + if (mdev->roce.roce_en == 0) + err = mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED); + + if (err) + mdev->roce.roce_en++; + } + mutex_unlock(&mlx5_roce_en_lock); + return err; +} +EXPORT_SYMBOL(mlx5_nic_vport_disable_roce); + +int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport, + int vf, u8 port_num, void *out) +{ + int in_sz = MLX5_ST_SZ_BYTES(query_vport_counter_in); + int is_group_manager; + void *in; + int err; + + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + in = kvzalloc(in_sz, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + return err; + } + + MLX5_SET(query_vport_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_VPORT_COUNTER); + if (other_vport) { + if (is_group_manager) { + MLX5_SET(query_vport_counter_in, in, other_vport, 1); + MLX5_SET(query_vport_counter_in, in, vport_number, vf + 1); + } else { + err = -EPERM; + goto free; + } + } + if (MLX5_CAP_GEN(dev, num_ports) == 2) + MLX5_SET(query_vport_counter_in, in, port_num, port_num); + + err = mlx5_cmd_exec_inout(dev, query_vport_counter, in, out); +free: + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter); + +int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport, + u8 other_vport, u64 *rx_discard_vport_down, + u64 *tx_discard_vport_down) +{ + u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {}; + int err; + + MLX5_SET(query_vnic_env_in, in, opcode, + MLX5_CMD_OP_QUERY_VNIC_ENV); + MLX5_SET(query_vnic_env_in, in, op_mod, 0); + MLX5_SET(query_vnic_env_in, in, vport_number, vport); + MLX5_SET(query_vnic_env_in, in, other_vport, other_vport); + + err = mlx5_cmd_exec_inout(mdev, query_vnic_env, in, out); + if (err) + return err; + + *rx_discard_vport_down = MLX5_GET64(query_vnic_env_out, out, + vport_env.receive_discard_vport_down); + *tx_discard_vport_down = MLX5_GET64(query_vnic_env_out, out, + vport_env.transmit_discard_vport_down); + return 0; +} + +int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev, + u8 other_vport, u8 port_num, + int vf, + struct mlx5_hca_vport_context *req) +{ + int in_sz = MLX5_ST_SZ_BYTES(modify_hca_vport_context_in); + int is_group_manager; + void *ctx; + void *in; + int err; + + mlx5_core_dbg(dev, "vf %d\n", vf); + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + in = kvzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_hca_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT); + if (other_vport) { + if (is_group_manager) { + MLX5_SET(modify_hca_vport_context_in, in, other_vport, 1); + MLX5_SET(modify_hca_vport_context_in, in, vport_number, vf); + } else { + err = -EPERM; + goto ex; + } + } + + if (MLX5_CAP_GEN(dev, num_ports) > 1) + MLX5_SET(modify_hca_vport_context_in, in, port_num, port_num); + + ctx = MLX5_ADDR_OF(modify_hca_vport_context_in, in, hca_vport_context); + MLX5_SET(hca_vport_context, ctx, field_select, req->field_select); + if (req->field_select & MLX5_HCA_VPORT_SEL_STATE_POLICY) + MLX5_SET(hca_vport_context, ctx, vport_state_policy, + req->policy); + if (req->field_select & MLX5_HCA_VPORT_SEL_PORT_GUID) + MLX5_SET64(hca_vport_context, ctx, port_guid, req->port_guid); + if (req->field_select & MLX5_HCA_VPORT_SEL_NODE_GUID) + MLX5_SET64(hca_vport_context, ctx, node_guid, req->node_guid); + MLX5_SET(hca_vport_context, ctx, cap_mask1, req->cap_mask1); + MLX5_SET(hca_vport_context, ctx, cap_mask1_field_select, + req->cap_mask1_perm); + err = mlx5_cmd_exec_in(dev, modify_hca_vport_context, in); +ex: + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_modify_hca_vport_context); + +int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev, + struct mlx5_core_dev *port_mdev) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + err = mlx5_nic_vport_enable_roce(port_mdev); + if (err) + goto free; + + MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1); + if (MLX5_CAP_GEN_2(master_mdev, sw_vhca_id_valid)) { + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.vhca_id_type, VHCA_ID_TYPE_SW); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliated_vhca_id, + MLX5_CAP_GEN_2(master_mdev, sw_vhca_id)); + } else { + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliated_vhca_id, + MLX5_CAP_GEN(master_mdev, vhca_id)); + } + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliation_criteria, + MLX5_CAP_GEN(port_mdev, affiliate_nic_vport_criteria)); + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + err = mlx5_cmd_exec_in(port_mdev, modify_nic_vport_context, in); + if (err) + mlx5_nic_vport_disable_roce(port_mdev); + +free: + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_affiliate_multiport); + +int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliated_vhca_id, 0); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliation_criteria, 0); + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + err = mlx5_cmd_exec_in(port_mdev, modify_nic_vport_context, in); + if (!err) + mlx5_nic_vport_disable_roce(port_mdev); + + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_unaffiliate_multiport); + +u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev) +{ + int port_type_cap = MLX5_CAP_GEN(mdev, port_type); + u64 tmp; + int err; + + if (mdev->sys_image_guid) + return mdev->sys_image_guid; + + if (port_type_cap == MLX5_CAP_PORT_TYPE_ETH) + err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp); + else + err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp); + + mdev->sys_image_guid = err ? 0 : tmp; + + return mdev->sys_image_guid; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid); + +int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 vport, void *out, + u16 opmod) +{ + bool ec_vf_func = mlx5_core_is_ec_vf_vport(dev, vport); + u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {}; + + opmod = (opmod << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01); + MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); + MLX5_SET(query_hca_cap_in, in, op_mod, opmod); + MLX5_SET(query_hca_cap_in, in, function_id, mlx5_vport_to_func_id(dev, vport, ec_vf_func)); + MLX5_SET(query_hca_cap_in, in, other_function, true); + MLX5_SET(query_hca_cap_in, in, ec_vf_function, ec_vf_func); + return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out); +} +EXPORT_SYMBOL_GPL(mlx5_vport_get_other_func_cap); + +int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, + u16 vport, u16 opmod) +{ + bool ec_vf_func = mlx5_core_is_ec_vf_vport(dev, vport); + int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + void *set_hca_cap; + void *set_ctx; + int ret; + + set_ctx = kzalloc(set_sz, GFP_KERNEL); + if (!set_ctx) + return -ENOMEM; + + MLX5_SET(set_hca_cap_in, set_ctx, opcode, MLX5_CMD_OP_SET_HCA_CAP); + MLX5_SET(set_hca_cap_in, set_ctx, op_mod, opmod << 1); + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); + memcpy(set_hca_cap, hca_cap, MLX5_ST_SZ_BYTES(cmd_hca_cap)); + MLX5_SET(set_hca_cap_in, set_ctx, function_id, + mlx5_vport_to_func_id(dev, vport, ec_vf_func)); + MLX5_SET(set_hca_cap_in, set_ctx, other_function, true); + MLX5_SET(set_hca_cap_in, set_ctx, ec_vf_function, ec_vf_func); + ret = mlx5_cmd_exec_in(dev, set_hca_cap, set_ctx); + + kfree(set_ctx); + return ret; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c new file mode 100644 index 0000000000..3091dd0146 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include "wq.h" +#include "mlx5_core.h" + +int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *wqc, struct mlx5_wq_cyc *wq, + struct mlx5_wq_ctrl *wq_ctrl) +{ + u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride); + u8 log_wq_sz = MLX5_GET(wq, wqc, log_wq_sz); + struct mlx5_frag_buf_ctrl *fbc = &wq->fbc; + int err; + + err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); + return err; + } + + wq->db = wq_ctrl->db.db; + + err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride), + &wq_ctrl->buf, param->buf_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err); + goto err_db_free; + } + + mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc); + wq->sz = mlx5_wq_cyc_get_size(wq); + + wq_ctrl->mdev = mdev; + + return 0; + +err_db_free: + mlx5_db_free(mdev, &wq_ctrl->db); + + return err; +} + +void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides) +{ + size_t len; + void *wqe; + + if (!net_ratelimit()) + return; + + nstrides = max_t(u8, nstrides, 1); + + len = nstrides << wq->fbc.log_stride; + wqe = mlx5_wq_cyc_get_wqe(wq, ix); + + pr_info("WQE DUMP: WQ size %d WQ cur size %d, WQE index 0x%x, len: %zu\n", + mlx5_wq_cyc_get_size(wq), wq->cur_sz, ix, len); + print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, wqe, len, false); +} + +void mlx5_wq_cyc_reset(struct mlx5_wq_cyc *wq) +{ + wq->wqe_ctr = 0; + wq->cur_sz = 0; + mlx5_wq_cyc_update_db_record(wq); +} + +int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *qpc, struct mlx5_wq_qp *wq, + struct mlx5_wq_ctrl *wq_ctrl) +{ + u8 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride) + 4; + u8 log_rq_sz = MLX5_GET(qpc, qpc, log_rq_size); + u8 log_sq_stride = ilog2(MLX5_SEND_WQE_BB); + u8 log_sq_sz = MLX5_GET(qpc, qpc, log_sq_size); + + u32 rq_byte_size; + int err; + + + + err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); + return err; + } + + err = mlx5_frag_buf_alloc_node(mdev, + wq_get_byte_sz(log_rq_sz, log_rq_stride) + + wq_get_byte_sz(log_sq_sz, log_sq_stride), + &wq_ctrl->buf, param->buf_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err); + goto err_db_free; + } + + mlx5_init_fbc(wq_ctrl->buf.frags, log_rq_stride, log_rq_sz, &wq->rq.fbc); + + rq_byte_size = wq_get_byte_sz(log_rq_sz, log_rq_stride); + + if (rq_byte_size < PAGE_SIZE) { + /* SQ starts within the same page of the RQ */ + u16 sq_strides_offset = rq_byte_size / MLX5_SEND_WQE_BB; + + mlx5_init_fbc_offset(wq_ctrl->buf.frags, + log_sq_stride, log_sq_sz, sq_strides_offset, + &wq->sq.fbc); + } else { + u16 rq_npages = rq_byte_size >> PAGE_SHIFT; + + mlx5_init_fbc(wq_ctrl->buf.frags + rq_npages, + log_sq_stride, log_sq_sz, &wq->sq.fbc); + } + + wq->rq.db = &wq_ctrl->db.db[MLX5_RCV_DBR]; + wq->sq.db = &wq_ctrl->db.db[MLX5_SND_DBR]; + + wq_ctrl->mdev = mdev; + + return 0; + +err_db_free: + mlx5_db_free(mdev, &wq_ctrl->db); + + return err; +} + +int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *cqc, struct mlx5_cqwq *wq, + struct mlx5_wq_ctrl *wq_ctrl) +{ + /* CQE_STRIDE_128 and CQE_STRIDE_128_PAD both mean 128B stride */ + u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) == CQE_STRIDE_64 ? 6 : 7; + u8 log_wq_sz = MLX5_GET(cqc, cqc, log_cq_size); + int err; + + err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); + return err; + } + + wq->db = wq_ctrl->db.db; + + err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride), + &wq_ctrl->buf, + param->buf_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", + err); + goto err_db_free; + } + + mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, &wq->fbc); + + wq_ctrl->mdev = mdev; + + return 0; + +err_db_free: + mlx5_db_free(mdev, &wq_ctrl->db); + + return err; +} + +static void mlx5_wq_ll_init_list(struct mlx5_wq_ll *wq) +{ + struct mlx5_wqe_srq_next_seg *next_seg; + int i; + + for (i = 0; i < wq->fbc.sz_m1; i++) { + next_seg = mlx5_wq_ll_get_wqe(wq, i); + next_seg->next_wqe_index = cpu_to_be16(i + 1); + } + next_seg = mlx5_wq_ll_get_wqe(wq, i); + wq->tail_next = &next_seg->next_wqe_index; +} + +int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *wqc, struct mlx5_wq_ll *wq, + struct mlx5_wq_ctrl *wq_ctrl) +{ + u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride); + u8 log_wq_sz = MLX5_GET(wq, wqc, log_wq_sz); + struct mlx5_frag_buf_ctrl *fbc = &wq->fbc; + int err; + + err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); + return err; + } + + wq->db = wq_ctrl->db.db; + + err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride), + &wq_ctrl->buf, param->buf_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err); + goto err_db_free; + } + + mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc); + + mlx5_wq_ll_init_list(wq); + wq_ctrl->mdev = mdev; + + return 0; + +err_db_free: + mlx5_db_free(mdev, &wq_ctrl->db); + + return err; +} + +void mlx5_wq_ll_reset(struct mlx5_wq_ll *wq) +{ + wq->head = 0; + wq->wqe_ctr = 0; + wq->cur_sz = 0; + mlx5_wq_ll_init_list(wq); + mlx5_wq_ll_update_db_record(wq); +} + +void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl) +{ + mlx5_frag_buf_free(wq_ctrl->mdev, &wq_ctrl->buf); + mlx5_db_free(wq_ctrl->mdev, &wq_ctrl->db); +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h new file mode 100644 index 0000000000..e4ef1d24a3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -0,0 +1,325 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_WQ_H__ +#define __MLX5_WQ_H__ + +#include +#include +#include + +struct mlx5_wq_param { + int buf_numa_node; + int db_numa_node; +}; + +struct mlx5_wq_ctrl { + struct mlx5_core_dev *mdev; + struct mlx5_frag_buf buf; + struct mlx5_db db; +}; + +struct mlx5_wq_cyc { + struct mlx5_frag_buf_ctrl fbc; + __be32 *db; + u16 sz; + u16 wqe_ctr; + u16 cur_sz; +}; + +struct mlx5_wq_qp { + struct mlx5_wq_cyc rq; + struct mlx5_wq_cyc sq; +}; + +struct mlx5_cqwq { + struct mlx5_frag_buf_ctrl fbc; + __be32 *db; + u32 cc; /* consumer counter */ +}; + +struct mlx5_wq_ll { + struct mlx5_frag_buf_ctrl fbc; + __be32 *db; + __be16 *tail_next; + u16 head; + u16 wqe_ctr; + u16 cur_sz; +}; + +int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *wqc, struct mlx5_wq_cyc *wq, + struct mlx5_wq_ctrl *wq_ctrl); +void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides); +void mlx5_wq_cyc_reset(struct mlx5_wq_cyc *wq); + +int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *qpc, struct mlx5_wq_qp *wq, + struct mlx5_wq_ctrl *wq_ctrl); + +int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *cqc, struct mlx5_cqwq *wq, + struct mlx5_wq_ctrl *wq_ctrl); + +int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *wqc, struct mlx5_wq_ll *wq, + struct mlx5_wq_ctrl *wq_ctrl); +void mlx5_wq_ll_reset(struct mlx5_wq_ll *wq); + +void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl); + +static inline u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq) +{ + return (u32)wq->fbc.sz_m1 + 1; +} + +static inline int mlx5_wq_cyc_is_full(struct mlx5_wq_cyc *wq) +{ + return wq->cur_sz == wq->sz; +} + +static inline int mlx5_wq_cyc_missing(struct mlx5_wq_cyc *wq) +{ + return wq->sz - wq->cur_sz; +} + +static inline int mlx5_wq_cyc_is_empty(struct mlx5_wq_cyc *wq) +{ + return !wq->cur_sz; +} + +static inline void mlx5_wq_cyc_push(struct mlx5_wq_cyc *wq) +{ + wq->wqe_ctr++; + wq->cur_sz++; +} + +static inline void mlx5_wq_cyc_push_n(struct mlx5_wq_cyc *wq, u16 n) +{ + wq->wqe_ctr += n; + wq->cur_sz += n; +} + +static inline void mlx5_wq_cyc_pop(struct mlx5_wq_cyc *wq) +{ + wq->cur_sz--; +} + +static inline void mlx5_wq_cyc_update_db_record(struct mlx5_wq_cyc *wq) +{ + *wq->db = cpu_to_be32(wq->wqe_ctr); +} + +static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr) +{ + return ctr & wq->fbc.sz_m1; +} + +static inline u16 mlx5_wq_cyc_get_head(struct mlx5_wq_cyc *wq) +{ + return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr); +} + +static inline u16 mlx5_wq_cyc_get_tail(struct mlx5_wq_cyc *wq) +{ + return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr - wq->cur_sz); +} + +static inline void *mlx5_wq_cyc_get_wqe(struct mlx5_wq_cyc *wq, u16 ix) +{ + return mlx5_frag_buf_get_wqe(&wq->fbc, ix); +} + +static inline u16 mlx5_wq_cyc_get_contig_wqebbs(struct mlx5_wq_cyc *wq, u16 ix) +{ + return mlx5_frag_buf_get_idx_last_contig_stride(&wq->fbc, ix) - ix + 1; +} + +static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2) +{ + int equal = (cc1 == cc2); + int smaller = 0x8000 & (cc1 - cc2); + + return !equal && !smaller; +} + +static inline u16 mlx5_wq_cyc_get_counter(struct mlx5_wq_cyc *wq) +{ + return wq->wqe_ctr; +} + +static inline u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq) +{ + return wq->fbc.sz_m1 + 1; +} + +static inline u8 mlx5_cqwq_get_log_stride_size(struct mlx5_cqwq *wq) +{ + return wq->fbc.log_stride; +} + +static inline u32 mlx5_cqwq_ctr2ix(struct mlx5_cqwq *wq, u32 ctr) +{ + return ctr & wq->fbc.sz_m1; +} + +static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq) +{ + return mlx5_cqwq_ctr2ix(wq, wq->cc); +} + +static inline struct mlx5_cqe64 *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix) +{ + struct mlx5_cqe64 *cqe = mlx5_frag_buf_get_wqe(&wq->fbc, ix); + + /* For 128B CQEs the data is in the last 64B */ + cqe += wq->fbc.log_stride == 7; + + return cqe; +} + +static inline u32 mlx5_cqwq_get_ctr_wrap_cnt(struct mlx5_cqwq *wq, u32 ctr) +{ + return ctr >> wq->fbc.log_sz; +} + +static inline u32 mlx5_cqwq_get_wrap_cnt(struct mlx5_cqwq *wq) +{ + return mlx5_cqwq_get_ctr_wrap_cnt(wq, wq->cc); +} + +static inline void mlx5_cqwq_pop(struct mlx5_cqwq *wq) +{ + wq->cc++; +} + +static inline void mlx5_cqwq_update_db_record(struct mlx5_cqwq *wq) +{ + *wq->db = cpu_to_be32(wq->cc & 0xffffff); +} + +static inline struct mlx5_cqe64 *mlx5_cqwq_get_cqe(struct mlx5_cqwq *wq) +{ + u32 ci = mlx5_cqwq_get_ci(wq); + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci); + u8 cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK; + u8 sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1; + + if (cqe_ownership_bit != sw_ownership_val) + return NULL; + + /* ensure cqe content is read after cqe ownership bit */ + dma_rmb(); + + return cqe; +} + +static inline +struct mlx5_cqe64 *mlx5_cqwq_get_cqe_enahnced_comp(struct mlx5_cqwq *wq) +{ + u8 sw_validity_iteration_count = mlx5_cqwq_get_wrap_cnt(wq) & 0xff; + u32 ci = mlx5_cqwq_get_ci(wq); + struct mlx5_cqe64 *cqe; + + cqe = mlx5_cqwq_get_wqe(wq, ci); + if (cqe->validity_iteration_count != sw_validity_iteration_count) + return NULL; + + /* ensure cqe content is read after cqe ownership bit/validity byte */ + dma_rmb(); + + return cqe; +} + +static inline u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq) +{ + return (u32)wq->fbc.sz_m1 + 1; +} + +static inline int mlx5_wq_ll_is_full(struct mlx5_wq_ll *wq) +{ + return wq->cur_sz == wq->fbc.sz_m1; +} + +static inline int mlx5_wq_ll_is_empty(struct mlx5_wq_ll *wq) +{ + return !wq->cur_sz; +} + +static inline int mlx5_wq_ll_missing(struct mlx5_wq_ll *wq) +{ + return wq->fbc.sz_m1 - wq->cur_sz; +} + +static inline void *mlx5_wq_ll_get_wqe(struct mlx5_wq_ll *wq, u16 ix) +{ + return mlx5_frag_buf_get_wqe(&wq->fbc, ix); +} + +static inline u16 mlx5_wq_ll_get_wqe_next_ix(struct mlx5_wq_ll *wq, u16 ix) +{ + struct mlx5_wqe_srq_next_seg *wqe = mlx5_wq_ll_get_wqe(wq, ix); + + return be16_to_cpu(wqe->next_wqe_index); +} + +static inline void mlx5_wq_ll_push(struct mlx5_wq_ll *wq, u16 head_next) +{ + wq->head = head_next; + wq->wqe_ctr++; + wq->cur_sz++; +} + +static inline void mlx5_wq_ll_pop(struct mlx5_wq_ll *wq, __be16 ix, + __be16 *next_tail_next) +{ + *wq->tail_next = ix; + wq->tail_next = next_tail_next; + wq->cur_sz--; +} + +static inline void mlx5_wq_ll_update_db_record(struct mlx5_wq_ll *wq) +{ + *wq->db = cpu_to_be32(wq->wqe_ctr); +} + +static inline u16 mlx5_wq_ll_get_head(struct mlx5_wq_ll *wq) +{ + return wq->head; +} + +static inline u16 mlx5_wq_ll_get_counter(struct mlx5_wq_ll *wq) +{ + return wq->wqe_ctr; +} + +#endif /* __MLX5_WQ_H__ */ -- cgit v1.2.3